diff options
author | Thadeu Lima de Souza Cascardo <cascardo@debian.org> | 2018-07-19 16:34:06 -0300 |
---|---|---|
committer | Thadeu Lima de Souza Cascardo <cascardo@debian.org> | 2018-07-19 16:34:06 -0300 |
commit | edad422003b3f542856d3d5dbf0fe0dc6727d4d4 (patch) | |
tree | e235c6836eb76dc59c987cd4506e60d660df6302 |
Import makedumpfile_1.6.4.orig.tar.gz
[dgit import orig makedumpfile_1.6.4.orig.tar.gz]
52 files changed, 32274 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0b78a96 --- /dev/null +++ b/.gitignore @@ -0,0 +1,80 @@ +# +# NOTE! Don't add files that are generated in specific +# subdirectories here. Add them in the ".gitignore" file +# in that subdirectory instead. +# +# NOTE! Please use 'git ls-files -i --exclude-standard' +# command after changing this file, to see if there are +# any tracked files which get ignored after the change. +# +# Normal rules +# +.* +*.o +*.o.* +*.a +*.s +*.ko +*.so +*.so.dbg +*.mod.c +*.i +*.lst +*.symtypes +*.order +modules.builtin +*.elf +*.bin +*.gz +*.bz2 +*.lzma +*.lzo +*.patch +*.gcno + +# +# Top-level generic files +# +/tags +/TAGS +/linux +/vmlinux +/vmlinuz +/System.map +/Module.markers +/Module.symvers + +# +# git files that we don't want to ignore even it they are dot-files +# +!.gitignore +!.mailmap + +# +# Generated include files +# +include/config +include/linux/version.h +include/generated + +# stgit generated dirs +patches-* + +# quilt's files +patches +series + +# cscope files +cscope.* +ncscope.* + +# gnu global files +GPATH +GRTAGS +GSYMS +GTAGS + +*.orig +*~ +\#*# +makedumpfile @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/IMPLEMENTATION b/IMPLEMENTATION new file mode 100644 index 0000000..589c5bf --- /dev/null +++ b/IMPLEMENTATION @@ -0,0 +1,282 @@ +* The kdump-compressed format + makedumpfile provides two DUMPFILE formats (the ELF format and the + kdump-compressed format). By default, makedumpfile makes a DUMPFILE + in the kdump-compressed format. The kdump-compressed format is readable + only with the crash utility, and it can be smaller than the ELF format + because of the compression support. + + - The file structure + File offset + +------------------------------------------+ 0x0 + | main header (struct disk_dump_header) | + |------------------------------------------+ block 1 + | sub header (struct kdump_sub_header) | + |------------------------------------------+ block 2 + | 1st-bitmap | + |------------------------------------------+ block 2 + X blocks + | 2nd-bitmap | (aligned by block) + |------------------------------------------+ block 2 + 2 * X blocks + | page header for pfn 0 (struct page_desc) | (aligned by block) + | page header for pfn 1 (struct page_desc) | + | : | + | page header for pfn Z (struct page_desc) | + |------------------------------------------| (not aligned by block) + | page data (pfn 0) | + | page data (pfn 1) | + | : | + | page data (pfn Z) | + +------------------------------------------+ offset_eraseinfo + | erase mystruct2.mystruct1.var size 4 | + | erase mystruct2.mystruct1.ptr nullify | + | erase mystruct2.mystruct.array size 100 | + +------------------------------------------+ + + + - main header + The main header of the kdump compressed format is the almost same as the + one of diskdump. This header has the following members, and the member + signature and header_version are different from diskdump. + + struct disk_dump_header { + char signature[SIG_LEN]; /* = "KDUMP " */ + int header_version; /* Dump header version */ + struct new_utsname utsname; /* copy of system_utsname */ + struct timeval timestamp; /* Time stamp */ + unsigned int status; /* Above flags */ + int block_size; /* Size of a block in byte */ + int sub_hdr_size; /* Size of arch dependent + header in blocks */ + unsigned int bitmap_blocks; /* Size of Memory bitmap in + block */ + unsigned int max_mapnr; /* = max_mapnr, OBSOLETE! + 32bit only, full 64bit + in sub header. */ + unsigned int total_ram_blocks;/* Number of blocks should be + written */ + unsigned int device_blocks; /* Number of total blocks in + * the dump device */ + unsigned int written_blocks; /* Number of written blocks */ + unsigned int current_cpu; /* CPU# which handles dump */ + int nr_cpus; /* Number of CPUs */ + struct task_struct *tasks[0]; + }; + + - sub header + The sub header of the kdump compressed format is original. This header + has the member phys_base and dump_level. The member phys_base is for + an x86_64 relocatable kernel, and the member dump_level has '-d' option's + value of makedumpfile command. + + struct kdump_sub_header { + unsigned long phys_base; + int dump_level; /* header_version 1 and later */ + int split; /* header_version 2 and later */ + unsigned long start_pfn; /* header_version 2 and later, + OBSOLETE! 32bit only, full + 64bit in start_pfn_64. */ + unsigned long end_pfn; /* header_version 2 and later, + OBSOLETE! 32bit only, full + 64bit in end_pfn_64. */ + off_t offset_vmcoreinfo;/* header_version 3 and later */ + unsigned long size_vmcoreinfo; /* header_version 3 and later */ + off_t offset_note; /* header_version 4 and later */ + unsigned long size_note; /* header_version 4 and later */ + off_t offset_eraseinfo; /* header_version 5 and later */ + unsigned long size_eraseinfo; /* header_version 5 and later */ + unsigned long long start_pfn_64; /* header_version 6 and later */ + unsigned long long end_pfn_64; /* header_version 6 and later */ + unsigned long long max_mapnr_64; /* header_version 6 and later */ + }; + + - 1st-bitmap + The bit of 1st-bitmap presents either a page on memory hole, or not. + If a page is on memory hole, the corresponding bit is off. Otherwise, + it is on. + + + - 2nd-bitmap + The bit of 2nd-bitmap presents either a dumpable page, or not. + If a page is on memory hole or excluded by makedumpfile command, the + corresponding bit is off. Otherwise, it is on. + + + - page header + There are page headers corresponding to dumpable pages. + This header presents the corresponding page information (compressed, or not. + etc.) + + typedef struct page_desc { + off_t offset; /* the offset of the page data*/ + unsigned int size; /* the size of this dump page */ + unsigned int flags; /* flags */ + unsigned long long page_flags; /* page flags */ + } page_desc_t; + + +* The ELF format + There are two different ELF format(ELF32, ELF64) for K-bit architectures (K=32,64). + Since they almost have the same behaviour in this situation, the following will use + ELF32 as a example. + + - The file structure + + +---------------------------------+ + | elf_header (struct elf32_hdr) | + |---------------------------------+ + | PT_NOTE (struct elf32_phdr) | + |---------------------------------+ + | PT_LOAD(1) (struct elf32_phdr) | + | PT_LOAD(2) (struct elf32_phdr) | + | : | + | PT_LOAD(Z) (struct elf32_phdr) | + |---------------------------------+ + | NOTE | + |---------------------------------+ + | segment(1) | + | segment(2) | + | : | + | segment(Z) | + +---------------------------------+ + + - elf_header + This header is almost the same as a normal elf_header. The difference is that the + e_flags is used for indicating whether the dump file is complete or not. + 0x0 : complete, + 0x1 : incomplete + + typedef struct elf32_hdr{ + unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */ + Elf32_Half e_type; /* Object file type (CORE) */ + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; + Elf32_Off e_phoff; /* Program header table file offset */ + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; /* Size of this header */ + Elf32_Half e_phentsize; /* Size of program headers */ + Elf32_Half e_phnum; /* Number of program headers */ + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; + } Elf32_Ehdr; + + - PT_NOTE and PT_LOAD + PT_NOTE corresponds to NOTE and PT_LOAD to segment. + They present the corresponding NOTE and segments information. + + + typedef struct elf32_phdr{ + Elf32_Word p_type; + Elf32_Off p_offset; /* Segment file offset */ + Elf32_Addr p_vaddr; /* Segment virtual address */ + Elf32_Addr p_paddr; /* Segment physical address */ + Elf32_Word p_filesz; /* Segment size in file */ + Elf32_Word p_memsz; /* Segment size in memory */ + Elf32_Word p_flags; + Elf32_Word p_align; /* Segment alignment, file & memory */ + } Elf32_Phdr; + + - note + The note structure + + +------------------------------------+ + | note header 1 (struct elf32_note) | + | note header 2 (struct elf32_note) | + | : | + | note header N (struct elf32_note) | + |------------------------------------+ + | note data 1 | + | note data 2 | + | : | + | note data N | + +------------------------------------+ + + typedef struct elf32_note { + Elf32_Word n_namesz; /* Name size */ + Elf32_Word n_descsz; /* Content size */ + Elf32_Word n_type; /* Content type */ + } Elf32_Nhdr; + + - segments + The data dumped are all stored in segments and notes. + + +* The incomplete DUMPFILE + When generating DUMPFILE, if ENOSPACE error happens, the DUMPFILE will be + incomplete. + + - The incomplete kdump-compressed DUMPFILE + + the complete the incomplete + +-----------------------+ +-----------------------+ + | main header | | main header | have incomplete flag + |-----------------------+ |-----------------------+ + | sub header | | sub header | + |-----------------------+ |-----------------------+ + | 1st-bitmap | | 1st-bitmap | + |-----------------------+ |-----------------------+ + | 2nd-bitmap | | 2nd-bitmap | + |-----------------------+ |-----------------------+ + | page header for pfn 0 | | page header for pfn 0 | + | page header for pfn 1 | | page header for pfn 1 | + | : | | : | + | page header for pfn N | | page header for pfn N | The page headers after + | : | | | N don't exist. The + | page header for pfn Z | | | value of it is zero, + |-----------------------| |-----------------------| when try to read it. + | page data (pfn 0) | | page data (pfn 0) | + | page data (pfn 1) | | page data (pfn 1) | + | : | | : | + | page data (pfn N) | | page data (pfn N) | + | : | +-----------------------+ + | page data (pfn Z) | + +-----------------------+ + + The incomplete flag is set into status of disk_dump_header by + status |= DUMP_DH_COMPRESSED_INCOMPLETE + DUMP_DH_COMPRESSED_INCOMPLETE : 0x8 + + The page header and page data are written in pairs. When writing page data + (pfn N+1), if ENOSPACE error happens, the page headers after N won't be + written either. + Since the data lost is filled with zero when it is read, the page_desc->offset + will also be zero. And zero page has its own offset not equal 0. So when reading + page from incomplete core, only the page lost by ENOSPACE errors has 0 in its + corresponding page descriptor's member offset. + + If there is no page data dumped into the DUMPFILE, the DUMPFILE can't be + analysed by crash. + + - The incomplete elf DUMPFILE + + the complete the incomplete + +-------------+ +-------------+ + | elf header | | elf header | have incomplete flag + |-------------+ |-------------+ + | PT_NOTE | | PT_NOTE | + | PT_LOAD(1) | | PT_LOAD(1) | + | : | | : | + | PT_LOAD(N) | | PT_LOAD(N) | The PT_LOAD after N don't exist. + | : | | | The value of it is zero, when try + | PT_LOAD(Z) | | | to read it. + |-------------+ |-------------+ + | NOTE | | NOTE | + |-------------+ |-------------+ + | segment(1) | | segment(1) | + | : | | : | + | segment(N) | | segment(N) | The segment(N) is incomplete. + | : | +-------------+ The segments after N don't exist. + | segment(Z) | + +-------------+ + + The incomplete flag is set into e_flags of elf_header by + e_flags |= DUMP_ELF_INCOMPLETE + DUMP_ELF_INCOMPLETE : 0x1 + + The PT_LOAD and segment are written in pairs. When writing segment(N) + , if ENOSPACE error happens, the PT_LOAD after N won't be written + either. + + If there is no segment dumped into the DUMPFILE, the DUMPFILE can't be + analysed by crash. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..612b9d0 --- /dev/null +++ b/Makefile @@ -0,0 +1,104 @@ +# makedumpfile + +VERSION=1.6.4 +DATE=3 Jul 2018 + +# Honour the environment variable CC +ifeq ($(strip $CC),) +CC = gcc +endif + +CFLAGS = -g -O2 -Wall -D_FILE_OFFSET_BITS=64 \ + -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE \ + -DVERSION='"$(VERSION)"' -DRELEASE_DATE='"$(DATE)"' +CFLAGS_ARCH = -g -O2 -Wall -D_FILE_OFFSET_BITS=64 \ + -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE +# LDFLAGS = -L/usr/local/lib -I/usr/local/include + +HOST_ARCH := $(shell uname -m) +# Use TARGET as the target architecture if specified. +# Defaults to uname -m +ifeq ($(strip($TARGET)),) +TARGET := $(HOST_ARCH) +endif + +ARCH := $(shell echo ${TARGET} | sed -e s/i.86/x86/ -e s/sun4u/sparc64/ \ + -e s/arm.*/arm/ -e s/sa110/arm/ \ + -e s/s390x/s390/ -e s/parisc64/parisc/ \ + -e s/ppc64/powerpc64/ -e s/ppc/powerpc32/) + +CROSS := +ifneq ($(TARGET), $(HOST_ARCH)) +CROSS := -U__$(HOST_ARCH)__ +endif + +CFLAGS += -D__$(ARCH)__ $(CROSS) +CFLAGS_ARCH += -D__$(ARCH)__ $(CROSS) + +ifeq ($(ARCH), powerpc64) +CFLAGS += -m64 +CFLAGS_ARCH += -m64 +endif + +ifeq ($(ARCH), powerpc32) +CFLAGS += -m32 +CFLAGS_ARCH += -m32 +endif + +SRC_BASE = makedumpfile.c makedumpfile.h diskdump_mod.h sadump_mod.h sadump_info.h +SRC_PART = print_info.c dwarf_info.c elf_info.c erase_info.c sadump_info.c cache.c tools.c +OBJ_PART=$(patsubst %.c,%.o,$(SRC_PART)) +SRC_ARCH = arch/arm.c arch/arm64.c arch/x86.c arch/x86_64.c arch/ia64.c arch/ppc64.c arch/s390x.c arch/ppc.c arch/sparc64.c +OBJ_ARCH=$(patsubst %.c,%.o,$(SRC_ARCH)) + +LIBS = -ldw -lbz2 -lebl -ldl -lelf -lz +ifneq ($(LINKTYPE), dynamic) +LIBS := -static $(LIBS) +endif + +ifeq ($(USELZO), on) +LIBS := -llzo2 $(LIBS) +CFLAGS += -DUSELZO +endif + +ifeq ($(USESNAPPY), on) +LIBS := -lsnappy $(LIBS) +CFLAGS += -DUSESNAPPY +endif + +LIBS := -lpthread $(LIBS) + +all: makedumpfile + +$(OBJ_PART): $(SRC_PART) + $(CC) $(CFLAGS) -c -o ./$@ $(VPATH)$(@:.o=.c) + +$(OBJ_ARCH): $(SRC_ARCH) + @mkdir -p $(@D) + $(CC) $(CFLAGS_ARCH) -c -o ./$@ $(VPATH)$(@:.o=.c) + +makedumpfile: $(SRC_BASE) $(OBJ_PART) $(OBJ_ARCH) + $(CC) $(CFLAGS) $(LDFLAGS) $(OBJ_PART) $(OBJ_ARCH) -rdynamic -o $@ $< $(LIBS) + echo .TH MAKEDUMPFILE 8 \"$(DATE)\" \"makedumpfile v$(VERSION)\" \"Linux System Administrator\'s Manual\" > temp.8 + grep -v "^.TH MAKEDUMPFILE 8" $(VPATH)makedumpfile.8 >> temp.8 + mv temp.8 makedumpfile.8 + gzip -c ./makedumpfile.8 > ./makedumpfile.8.gz + echo .TH MAKEDUMPFILE.CONF 5 \"$(DATE)\" \"makedumpfile v$(VERSION)\" \"Linux System Administrator\'s Manual\" > temp.5 + grep -v "^.TH MAKEDUMPFILE.CONF 5" $(VPATH)makedumpfile.conf.5 >> temp.5 + mv temp.5 makedumpfile.conf.5 + gzip -c ./makedumpfile.conf.5 > ./makedumpfile.conf.5.gz + +eppic_makedumpfile.so: extension_eppic.c + $(CC) $(CFLAGS) $(LDFLAGS) -shared -rdynamic -o $@ extension_eppic.c -fPIC -leppic -ltinfo + +clean: + rm -f $(OBJ) $(OBJ_PART) $(OBJ_ARCH) makedumpfile makedumpfile.8.gz makedumpfile.conf.5.gz + +install: + install -m 755 -d ${DESTDIR}/usr/sbin ${DESTDIR}/usr/share/man/man5 ${DESTDIR}/usr/share/man/man8 ${DESTDIR}/etc + install -m 755 -t ${DESTDIR}/usr/sbin makedumpfile $(VPATH)makedumpfile-R.pl + install -m 644 -t ${DESTDIR}/usr/share/man/man8 makedumpfile.8.gz + install -m 644 -t ${DESTDIR}/usr/share/man/man5 makedumpfile.conf.5.gz + install -m 644 -D $(VPATH)makedumpfile.conf ${DESTDIR}/etc/makedumpfile.conf.sample + mkdir -p ${DESTDIR}/usr/share/makedumpfile-${VERSION}/eppic_scripts + install -m 644 -t ${DESTDIR}/usr/share/makedumpfile-${VERSION}/eppic_scripts/ $(VPATH)eppic_scripts/* @@ -0,0 +1,202 @@ + +======================= + makedumpfile's README +======================= + +* REQUIREMENTS + Please download the following library file and install it exactly as below; + do NOT use "make install". + - elfutils-0.142.tar.gz + The "make install" of elfutils installs some commands (ld, readelf, etc.), + and compiling problems sometimes happen due to using the installed + commands. To install only the library & header files, use the following + method: + # tar -zxvf elfutils-0.142.tar.gz + # cd elfutils-0.142 + # ./configure + # make + # + # mkdir /usr/local/include/elfutils/ + # cp ./libdw/libdw.h /usr/local/include/elfutils/libdw.h + # cp ./libdw/dwarf.h /usr/local/include/dwarf.h + # cp ./libelf/libelf.h /usr/local/include/libelf.h + # cp ./libelf/gelf.h /usr/local/include/gelf.h + # + # cp ./libelf/libelf.a /usr/local/lib/libelf.a + # cp ./libdw/libdw.a /usr/local/lib/libdw.a + # cp ./libasm/libasm.a /usr/local/lib/libasm.a + # cp ./libebl/libebl.a /usr/local/lib/libebl.a + # + +* BUILD & INSTALL + 1.Get the latest makedumpfile from the following site: + https://sourceforge.net/projects/makedumpfile/ + 2.Uncompress the tar file: + # tar -zxvf makedumpfile-x.y.z.tar.gz + 3.Enter the makedumpfile subdirectory: + # cd makedumpfile-x.y.z + 4.Build, and install: + # make; make install + 5.Build for a different architecture than the host : + # make TARGET=<arch> ; make install + where <arch> is the 'uname -m' of the target architecture. + The user has to set the environment variable CC to appropriate + compiler for the target architecture. + 6.Build with lzo support: + # make USELZO=on ; make install + The user has to prepare lzo library. + 7.Build with snappy support: + # make USESNAPPY=on ; make install + The user has to prepare snappy library. + 8.Build the extension module for --eppic option. + # make eppic_makedumpfile.so + The user has to prepare eppic library from the following site: + http://code.google.com/p/eppic/ + +* SUPPORTED KERNELS + This makedumpfile supports the following kernels. + + | FLATMEM | DISCONTIGMEM | SPARSEMEM + |-------------------+-------------------+------------------------ + Kernel| | x86| PPC| PPC| | x86| | PPC| | x86| | PPC| + Version| x86| _64| 32| 64| x86| _64|ia64| 64| x86| _64|ia64| 64|s390 + -------+----+----+----+----+----+----+----+----+----+----+----+----+---- + 2.6.15 | OK | -- | | -- | -- | -- | -- | -- | -- | -- | -- | -- | + 2.6.16 | OK | OK | | | -- | OK | OK | -- | -- | | -- | | + 2.6.17 | OK | OK | | | -- | OK | -- | -- | -- | OK | -- | | + 2.6.18 | OK | OK | | OK | -- | OK | OK | -- | -- | OK | OK | OK | + 2.6.19 | OK | OK | | OK | OK | OK | | -- | OK | OK | OK | OK | + 2.6.20 | OK | OK | | #1 | OK | OK | OK | -- | OK | OK | OK | #1 | + 21-rc5 | OK | OK | | OK | OK | OK | OK | -- | OK | OK | OK | OK | + 2.6.21 | OK | OK | | | OK | OK | OK | -- | OK | OK | OK | | + 2.6.22 | OK | OK | | | OK | OK | OK | -- | OK | OK | OK | | + 2.6.23 | OK | OK | | | OK | OK | OK | -- | OK | OK | OK | | + 2.6.24 | OK | OK | | | OK | OK | OK | -- | OK | OK | OK | | + 2.6.25 | OK | ** | | | | ** | OK | -- | OK | OK | OK | | + 2.6.26 | OK | ** | | | | ** | OK | -- | OK | OK | OK | | + 2.6.27 | OK | ** | | | | ** | OK | -- | #2 | OK | OK | | + 2.6.28 | OK | ** | | | | ** | OK | -- | OK | OK | OK | | + 2.6.29 | OK | ** | | | | ** | OK | -- | OK | OK | OK | | + 2.6.30 | OK | ** | | | | ** | OK | -- | OK | OK | OK | | + 2.6.31 | OK | ** | | | | ** | | -- | OK | OK | OK | | + 2.6.32 | OK | ** | | | | ** | | -- | OK | OK | | | OK + 2.6.33 | OK | ** | | | | ** | | -- | OK | OK | | | + 2.6.34 | OK | ** | | | | ** | | -- | OK | OK | | | + 2.6.35 | OK | ** | | | | ** | | -- | OK | OK | | | + 2.6.36 | OK | ** | | | | ** | | -- | OK | OK | | | + 2.6.37 | OK | ** | | | | ** | | -- | OK | OK | | | + 2.6.38 | OK | ** | | | | ** | | -- | OK | OK | | | + 2.6.39 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.0 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.1 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.2 | OK | ** | OK | | | ** | | -- | OK | OK | | | + 3.3 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.4 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.5 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.6 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.7 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.8 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.9 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.10 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.11 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.12 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.13 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.14 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.15 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.16 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.17 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.18 | OK | ** | | | | ** | | -- | OK | OK | | | + 3.19 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.0 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.1 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.2 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.3 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.4 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.5 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.6 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.7 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.8 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.9 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.10 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.11 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.12 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.13 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.14 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.15 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.16 | OK | ** | | | | ** | | -- | OK | OK | | | + 4.17 | OK | ** | | | | ** | | -- | OK | OK | | | + + OK : Support. + -- : Not support. + Empty : Not test yet. + TODO : TODO. + #1 : Both kexec/kdump does not work with PPC64 2.6.20 kernels. + This is a known problem fixed with later kernels. + #2 : kdump does not work with i386 sparsemem 2.6.27 kernels. + This is a known problem fixed with later kernels. + ** : The deleted memory model. + On x86_64, both FLATMEM and DISCONTIGMEM have been deleted since + linux-2.6.25. + +* USAGE + Please see "man makedumpfile" or "makedumpfile -h". + +* TODO + 1. Supporting more kernels. + 2. Fixing the report message. + - Now, it sometimes counts the number of free_pages duplicating zero-pages + if creating an ELF dumpfile. + - The cyclic mode will show incorrect and many progress indicator. + +* NOTE + 1. A vmcoreinfo file should be generated by the makedumpfile which is used + for dump filtering. If installing the latest makedumpfile, the vmcoreinfo + file should be regenerated by it. Actually, makedumpfile v1.2.0 or later + cannot work by vmcoreinfo file which is generated by v1.1.9 or before. + + 2. If using linux-2.6.24 and kexec-tools-testing-20061214, /proc/vmcore + contains vmcoreinfo data. So it is possible to create a dumpfile without + -x option (or -i option) like the following: + # makedumpfile -d 31 /proc/vmcore dumpfile + + 3. On makedumpfile-1.2.5 or before, both '--xen-syms' and '--xen-vmcoreinfo' + excluded Xen user domain pages. Since makedumpfile-1.2.6, '-X' option + have been added for excluding Xen user domain pages, and these options + does not exclude Xen user domain pages. So user should specify '-X' + option for excluding Xen user domain pages. + +* FAQ + 001: If installing elfutils-0.137 into older elfutils by the above way, the + following problem happens sometimes. If seeing, try to enable LDFLAGS + comment (-L/usr/local/lib -I/usr/local/include) in Makefile. + + $ make + [..] + /tmp/ccXQtvnZ.o: In function `process_module': + /makedumpfile/makedumpfile.c:1387: undefined reference to `dwarf_getelf' + collect2: ld returned 1 exit status + make: *** [makedumpfile] Error 1 + + 002: makedumpfile is compiled with -static option, because the command should + run while the second kernel, which may not mount root filesystem and may + not contain dynamic library files, is running. + If the dynamic library files, which are needed by makedumpfile, exist + while the second kernel is running, we can use dynamic libraries by + "LINKTYPE" parameter. + + $ make LINKTYPE=dynamic + + This is a workaround for some linux distributions which does + not contain static library files needed by makedumpfile. + + +* REFERENCES + https://sourceforge.net/projects/makedumpfile/ + http://lists.infradead.org/pipermail/kexec/ + https://lists.linux-foundation.org/pipermail/fastboot/ + +* BUG REPORT + If finding some bugs, please send the information to the following: + Kazuhito Hagio <k-hagio@ab.jp.nec.com> + kexec-ml <kexec@lists.infradead.org> + diff --git a/arch/arm.c b/arch/arm.c new file mode 100644 index 0000000..af7442a --- /dev/null +++ b/arch/arm.c @@ -0,0 +1,169 @@ +/* + * arm.c + * + * Created by: Mika Westerberg <ext-mika.1.westerberg@nokia.com> + * Copyright (C) 2010 Nokia Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifdef __arm__ + +#include "../print_info.h" +#include "../elf_info.h" +#include "../makedumpfile.h" + +#define PMD_TYPE_MASK 3 +#define PMD_TYPE_SECT 2 +#define PMD_TYPE_TABLE 1 + +#define pgd_index(vaddr) ((vaddr) >> PGDIR_SHIFT) +#define pte_index(vaddr) ((vaddr >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) + +#define pgd_offset(pgdir, vaddr) \ + ((pgdir) + pgd_index(vaddr) * 2 * sizeof(unsigned long)) +#define pmd_offset(dir, vaddr) (dir) +#define pte_offset(pmd, vaddr) \ + (pmd_page_vaddr(pmd) + pte_index(vaddr) * sizeof(unsigned long)) + +/* + * These only work for kernel directly mapped addresses. + */ +#define __va(paddr) ((paddr) - info->phys_base + info->page_offset) +#define __pa(vaddr) ((vaddr) - info->page_offset + info->phys_base) + +static inline unsigned long +pmd_page_vaddr(unsigned long pmd) +{ + unsigned long ptr; + + ptr = pmd & ~(PTRS_PER_PTE * sizeof(void *) - 1); + ptr += PTRS_PER_PTE * sizeof(void *); + + return __va(ptr); +} + +int +get_phys_base_arm(void) +{ + unsigned long phys_base = ULONG_MAX; + unsigned long long phys_start; + int i; + + /* + * We resolve phys_base from PT_LOAD segments. LMA contains physical + * address of the segment, and we use the first one. + */ + for (i = 0; get_pt_load(i, &phys_start, NULL, NULL, NULL); i++) { + if (phys_start < phys_base) + phys_base = phys_start; + } + + if (phys_base == ULONG_MAX) { + ERRMSG("Can't determine phys_base.\n"); + return FALSE; + } + + info->phys_base = phys_base; + DEBUG_MSG("phys_base : %lx\n", phys_base); + + return TRUE; +} + +int +get_machdep_info_arm(void) +{ + info->page_offset = SYMBOL(_stext) & 0xffff0000UL; + info->max_physmem_bits = _MAX_PHYSMEM_BITS; + info->kernel_start = SYMBOL(_stext); + info->section_size_bits = _SECTION_SIZE_BITS; + + DEBUG_MSG("page_offset : %lx\n", info->page_offset); + DEBUG_MSG("kernel_start : %lx\n", info->kernel_start); + + return TRUE; +} + +/* + * vtop_arm() - translate arbitrary virtual address to physical + * @vaddr: virtual address to translate + * + * Function translates @vaddr into physical address using page tables. This + * address can be any virtual address. Returns physical address of the + * corresponding virtual address or %NOT_PADDR when there is no translation. + */ +static unsigned long long +vtop_arm(unsigned long vaddr) +{ + unsigned long long paddr = NOT_PADDR; + unsigned long ptr, pgd, pte, pmd; + + if (SYMBOL(swapper_pg_dir) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of swapper_pg_dir.\n"); + return NOT_PADDR; + } + + ptr = pgd_offset(SYMBOL(swapper_pg_dir), vaddr); + if (!readmem(VADDR, ptr, &pgd, sizeof(pmd))) { + ERRMSG("Can't read pgd\n"); + return NOT_PADDR; + } + + if (info->vaddr_for_vtop == vaddr) + MSG(" PGD : %08lx => %08lx\n", ptr, pgd); + + pmd = pmd_offset(pgd, vaddr); + + switch (pmd & PMD_TYPE_MASK) { + case PMD_TYPE_TABLE: { + /* 4k small page */ + ptr = pte_offset(pmd, vaddr); + if (!readmem(VADDR, ptr, &pte, sizeof(pte))) { + ERRMSG("Can't read pte\n"); + return NOT_PADDR; + } + + if (info->vaddr_for_vtop == vaddr) + MSG(" PTE : %08lx => %08lx\n", ptr, pte); + + if (!(pte & _PAGE_PRESENT)) { + ERRMSG("Can't get a valid pte.\n"); + return NOT_PADDR; + } + + paddr = PAGEBASE(pte) + (vaddr & (PAGESIZE() - 1)); + break; + } + + case PMD_TYPE_SECT: + /* 1MB section */ + pte = pmd & PMD_MASK; + paddr = pte + (vaddr & (PMD_SIZE - 1)); + break; + } + + return paddr; +} + +unsigned long long +vaddr_to_paddr_arm(unsigned long vaddr) +{ + /* + * Only use translation tables when user has explicitly requested us to + * perform translation for a given address. Otherwise we assume that the + * translation is done within the kernel direct mapped region. + */ + if (info->vaddr_for_vtop == vaddr) + return vtop_arm(vaddr); + + return __pa(vaddr); +} + +#endif /* __arm__ */ diff --git a/arch/arm64.c b/arch/arm64.c new file mode 100644 index 0000000..2fd3e18 --- /dev/null +++ b/arch/arm64.c @@ -0,0 +1,378 @@ +/* + * arch/arm64.c : Based on arch/arm.c + * + * Copyright (C) 2015 Red Hat, Pratyush Anand <panand@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef __aarch64__ + +#include "../elf_info.h" +#include "../makedumpfile.h" +#include "../print_info.h" + +typedef struct { + unsigned long pgd; +} pgd_t; + +typedef struct { + pgd_t pgd; +} pud_t; + +typedef struct { + pud_t pud; +} pmd_t; + +typedef struct { + unsigned long pte; +} pte_t; + +static int pgtable_level; +static int va_bits; +static unsigned long kimage_voffset; + +#define SZ_4K (4 * 1024) +#define SZ_16K (16 * 1024) +#define SZ_64K (64 * 1024) +#define SZ_128M (128 * 1024 * 1024) + +#define PAGE_OFFSET_36 ((0xffffffffffffffffUL) << 36) +#define PAGE_OFFSET_39 ((0xffffffffffffffffUL) << 39) +#define PAGE_OFFSET_42 ((0xffffffffffffffffUL) << 42) +#define PAGE_OFFSET_47 ((0xffffffffffffffffUL) << 47) +#define PAGE_OFFSET_48 ((0xffffffffffffffffUL) << 48) + +#define pgd_val(x) ((x).pgd) +#define pud_val(x) (pgd_val((x).pgd)) +#define pmd_val(x) (pud_val((x).pud)) +#define pte_val(x) ((x).pte) + +#define PAGE_MASK (~(PAGESIZE() - 1)) +#define PGDIR_SHIFT ((PAGESHIFT() - 3) * pgtable_level + 3) +#define PTRS_PER_PGD (1 << (va_bits - PGDIR_SHIFT)) +#define PUD_SHIFT get_pud_shift_arm64() +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1)) +#define PTRS_PER_PTE (1 << (PAGESHIFT() - 3)) +#define PTRS_PER_PUD PTRS_PER_PTE +#define PMD_SHIFT ((PAGESHIFT() - 3) * 2 + 3) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE - 1)) +#define PTRS_PER_PMD PTRS_PER_PTE + +#define PAGE_PRESENT (1 << 0) +#define SECTIONS_SIZE_BITS 30 +/* Highest possible physical address supported */ +#define PHYS_MASK_SHIFT 48 +#define PHYS_MASK ((1UL << PHYS_MASK_SHIFT) - 1) +/* + * Remove the highest order bits that are not a part of the + * physical address in a section + */ +#define PMD_SECTION_MASK ((1UL << 40) - 1) + +#define PMD_TYPE_MASK 3 +#define PMD_TYPE_SECT 1 +#define PMD_TYPE_TABLE 3 + +#define PUD_TYPE_MASK 3 +#define PUD_TYPE_SECT 1 +#define PUD_TYPE_TABLE 3 + +#define pgd_index(vaddr) (((vaddr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) +#define pgd_offset(pgdir, vaddr) ((pgd_t *)(pgdir) + pgd_index(vaddr)) + +#define pte_index(vaddr) (((vaddr) >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) +#define pmd_page_paddr(pmd) (pmd_val(pmd) & PHYS_MASK & (int32_t)PAGE_MASK) +#define pte_offset(dir, vaddr) ((pte_t*)pmd_page_paddr((*dir)) + pte_index(vaddr)) + +#define pmd_index(vaddr) (((vaddr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +#define pud_page_paddr(pud) (pud_val(pud) & PHYS_MASK & (int32_t)PAGE_MASK) +#define pmd_offset_pgtbl_lvl_2(pud, vaddr) ((pmd_t *)pud) +#define pmd_offset_pgtbl_lvl_3(pud, vaddr) ((pmd_t *)pud_page_paddr((*pud)) + pmd_index(vaddr)) + +#define pud_index(vaddr) (((vaddr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pgd_page_paddr(pgd) (pgd_val(pgd) & PHYS_MASK & (int32_t)PAGE_MASK) + +static unsigned long long +__pa(unsigned long vaddr) +{ + if (kimage_voffset == NOT_FOUND_NUMBER || + (vaddr >= PAGE_OFFSET)) + return (vaddr - PAGE_OFFSET + info->phys_base); + else + return (vaddr - kimage_voffset); +} + +static int +get_pud_shift_arm64(void) +{ + if (pgtable_level == 4) + return ((PAGESHIFT() - 3) * 3 + 3); + else + return PGDIR_SHIFT; +} + +static pmd_t * +pmd_offset(pud_t *puda, pud_t *pudv, unsigned long vaddr) +{ + if (pgtable_level == 2) { + return pmd_offset_pgtbl_lvl_2(puda, vaddr); + } else { + return pmd_offset_pgtbl_lvl_3(pudv, vaddr); + } +} + +static pud_t * +pud_offset(pgd_t *pgda, pgd_t *pgdv, unsigned long vaddr) +{ + if (pgtable_level == 4) + return ((pud_t *)pgd_page_paddr((*pgdv)) + pud_index(vaddr)); + else + return (pud_t *)(pgda); +} + +static int calculate_plat_config(void) +{ + /* derive pgtable_level as per arch/arm64/Kconfig */ + if ((PAGESIZE() == SZ_16K && va_bits == 36) || + (PAGESIZE() == SZ_64K && va_bits == 42)) { + pgtable_level = 2; + } else if ((PAGESIZE() == SZ_64K && va_bits == 48) || + (PAGESIZE() == SZ_4K && va_bits == 39) || + (PAGESIZE() == SZ_16K && va_bits == 47)) { + pgtable_level = 3; + } else if ((PAGESIZE() != SZ_64K && va_bits == 48)) { + pgtable_level = 4; + } else { + ERRMSG("PAGE SIZE %#lx and VA Bits %d not supported\n", + PAGESIZE(), va_bits); + return FALSE; + } + + return TRUE; +} + +unsigned long +get_kvbase_arm64(void) +{ + return (0xffffffffffffffffUL << va_bits); +} + +int +get_phys_base_arm64(void) +{ + info->phys_base = NUMBER(PHYS_OFFSET); + + DEBUG_MSG("phys_base : %lx\n", info->phys_base); + + return TRUE; +} + +ulong +get_stext_symbol(void) +{ + int found; + FILE *fp; + char buf[BUFSIZE]; + char *kallsyms[MAXARGS]; + ulong kallsym; + + if (!file_exists("/proc/kallsyms")) { + ERRMSG("(%s) does not exist, will not be able to read symbols. %s\n", + "/proc/kallsyms", strerror(errno)); + return FALSE; + } + + if ((fp = fopen("/proc/kallsyms", "r")) == NULL) { + ERRMSG("Cannot open (%s) to read symbols. %s\n", + "/proc/kallsyms", strerror(errno)); + return FALSE; + } + + found = FALSE; + kallsym = 0; + + while (!found && fgets(buf, BUFSIZE, fp) && + (parse_line(buf, kallsyms) == 3)) { + if (hexadecimal(kallsyms[0], 0) && + STREQ(kallsyms[2], "_stext")) { + kallsym = htol(kallsyms[0], 0); + found = TRUE; + break; + } + } + fclose(fp); + + return(found ? kallsym : FALSE); +} + +int +get_machdep_info_arm64(void) +{ + /* Check if va_bits is still not initialized. If still 0, call + * get_versiondep_info() to initialize the same. + */ + if (!va_bits) + get_versiondep_info_arm64(); + + if (!calculate_plat_config()) { + ERRMSG("Can't determine platform config values\n"); + return FALSE; + } + + kimage_voffset = NUMBER(kimage_voffset); + info->max_physmem_bits = PHYS_MASK_SHIFT; + info->section_size_bits = SECTIONS_SIZE_BITS; + + DEBUG_MSG("kimage_voffset : %lx\n", kimage_voffset); + DEBUG_MSG("max_physmem_bits : %lx\n", info->max_physmem_bits); + DEBUG_MSG("section_size_bits: %lx\n", info->section_size_bits); + + return TRUE; +} + +unsigned long long +kvtop_xen_arm64(unsigned long kvaddr) +{ + return ERROR; +} + +int +get_xen_basic_info_arm64(void) +{ + return ERROR; +} + +int +get_xen_info_arm64(void) +{ + return ERROR; +} + +int +get_versiondep_info_arm64(void) +{ + ulong _stext; + + _stext = get_stext_symbol(); + if (!_stext) { + ERRMSG("Can't get the symbol of _stext.\n"); + return FALSE; + } + + /* Derive va_bits as per arch/arm64/Kconfig */ + if ((_stext & PAGE_OFFSET_36) == PAGE_OFFSET_36) { + va_bits = 36; + } else if ((_stext & PAGE_OFFSET_39) == PAGE_OFFSET_39) { + va_bits = 39; + } else if ((_stext & PAGE_OFFSET_42) == PAGE_OFFSET_42) { + va_bits = 42; + } else if ((_stext & PAGE_OFFSET_47) == PAGE_OFFSET_47) { + va_bits = 47; + } else if ((_stext & PAGE_OFFSET_48) == PAGE_OFFSET_48) { + va_bits = 48; + } else { + ERRMSG("Cannot find a proper _stext for calculating VA_BITS\n"); + return FALSE; + } + + info->page_offset = (0xffffffffffffffffUL) << (va_bits - 1); + + DEBUG_MSG("page_offset=%lx, va_bits=%d\n", info->page_offset, + va_bits); + + return TRUE; +} + +/* + * vaddr_to_paddr_arm64() - translate arbitrary virtual address to physical + * @vaddr: virtual address to translate + * + * Function translates @vaddr into physical address using page tables. This + * address can be any virtual address. Returns physical address of the + * corresponding virtual address or %NOT_PADDR when there is no translation. + */ +unsigned long long +vaddr_to_paddr_arm64(unsigned long vaddr) +{ + unsigned long long paddr = NOT_PADDR; + unsigned long long swapper_phys; + pgd_t *pgda, pgdv; + pud_t *puda, pudv; + pmd_t *pmda, pmdv; + pte_t *ptea, ptev; + + if (SYMBOL(swapper_pg_dir) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of swapper_pg_dir.\n"); + return NOT_PADDR; + } + + swapper_phys = __pa(SYMBOL(swapper_pg_dir)); + + pgda = pgd_offset(swapper_phys, vaddr); + if (!readmem(PADDR, (unsigned long long)pgda, &pgdv, sizeof(pgdv))) { + ERRMSG("Can't read pgd\n"); + return NOT_PADDR; + } + + puda = pud_offset(pgda, &pgdv, vaddr); + if (!readmem(PADDR, (unsigned long long)puda, &pudv, sizeof(pudv))) { + ERRMSG("Can't read pud\n"); + return NOT_PADDR; + } + + if ((pud_val(pudv) & PUD_TYPE_MASK) == PUD_TYPE_SECT) { + /* 1GB section for Page Table level = 4 and Page Size = 4KB */ + paddr = (pud_val(pudv) & (PUD_MASK & PMD_SECTION_MASK)) + + (vaddr & (PUD_SIZE - 1)); + return paddr; + } + + pmda = pmd_offset(puda, &pudv, vaddr); + if (!readmem(PADDR, (unsigned long long)pmda, &pmdv, sizeof(pmdv))) { + ERRMSG("Can't read pmd\n"); + return NOT_PADDR; + } + + switch (pmd_val(pmdv) & PMD_TYPE_MASK) { + case PMD_TYPE_TABLE: + ptea = pte_offset(&pmdv, vaddr); + /* 64k page */ + if (!readmem(PADDR, (unsigned long long)ptea, &ptev, sizeof(ptev))) { + ERRMSG("Can't read pte\n"); + return NOT_PADDR; + } + + if (!(pte_val(ptev) & PAGE_PRESENT)) { + ERRMSG("Can't get a valid pte.\n"); + return NOT_PADDR; + } else { + + paddr = (PAGEBASE(pte_val(ptev)) & PHYS_MASK) + + (vaddr & (PAGESIZE() - 1)); + } + break; + case PMD_TYPE_SECT: + /* 512MB section for Page Table level = 3 and Page Size = 64KB*/ + paddr = (pmd_val(pmdv) & (PMD_MASK & PMD_SECTION_MASK)) + + (vaddr & (PMD_SIZE - 1)); + break; + } + + return paddr; +} + +#endif /* __aarch64__ */ diff --git a/arch/ia64.c b/arch/ia64.c new file mode 100644 index 0000000..6c33cc7 --- /dev/null +++ b/arch/ia64.c @@ -0,0 +1,391 @@ +/* + * ia64.c + * + * Copyright (C) 2006, 2007, 2008 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifdef __ia64__ + +#include "../print_info.h" +#include "../elf_info.h" +#include "../makedumpfile.h" + + +/* + * vmalloc() starting address is either the traditional 0xa000000000000000 or + * bumped up in 2.6 to 0xa000000200000000. + */ +int +is_vmalloc_addr_ia64(unsigned long vaddr) +{ + return ((vaddr >= info->vmalloc_start) && + (vaddr < (unsigned long)KERNEL_UNCACHED_BASE)); +} + +int +get_phys_base_ia64(void) +{ + int i; + unsigned long long phys_start; + unsigned long long virt_start; + + /* + * Default to 64MB. + */ + info->phys_base = DEFAULT_PHYS_START; + + for (i = 0; get_pt_load(i, &phys_start, NULL, &virt_start, NULL); i++) { + if (VADDR_REGION(virt_start) == KERNEL_VMALLOC_REGION) { + + info->phys_base = phys_start; + break; + } + } + return TRUE; +} + +int +get_machdep_info_ia64(void) +{ + /* + * Get kernel_start and vmalloc_start. + */ + if (SYMBOL(_stext) == NOT_FOUND_SYMBOL) + return FALSE; + + info->kernel_start = SYMBOL(_stext); + + if (VADDR_REGION(info->kernel_start) == KERNEL_VMALLOC_REGION) + info->vmalloc_start = info->kernel_start + 4*1024UL*1024UL*1024UL; + else + info->vmalloc_start = KERNEL_VMALLOC_BASE; + + /* + * Check the pgtable (3 Levels or 4 Levels). + */ + if ((vt.mem_flags & MEMORY_PAGETABLE_4L) + || !strncmp(SRCFILE(pud_t), STR_PUD_T_4L, strlen(STR_PUD_T_4L))) { + vt.mem_flags |= MEMORY_PAGETABLE_4L; + DEBUG_MSG("PAGETABLE_4L : ON\n"); + } else if ((vt.mem_flags & MEMORY_PAGETABLE_3L) + || !strncmp(SRCFILE(pud_t), STR_PUD_T_3L, strlen(STR_PUD_T_3L))) { + vt.mem_flags |= MEMORY_PAGETABLE_3L; + DEBUG_MSG("PAGETABLE_3L : ON\n"); + } else { + MSG("Can't distinguish the pgtable.\n"); + } + + info->section_size_bits = _SECTION_SIZE_BITS; + info->max_physmem_bits = _MAX_PHYSMEM_BITS; + + return TRUE; +} + +/* + * Translate a virtual address to a physical address by using 3 levels paging. + */ +unsigned long long +vtop3_ia64(unsigned long vaddr) +{ + unsigned long long paddr, temp, page_dir, pgd_pte, page_middle, pmd_pte; + unsigned long long page_table, pte; + + if (SYMBOL(swapper_pg_dir) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of swapper_pg_dir.\n"); + return NOT_PADDR; + } + + /* + * Get PGD + */ + temp = vaddr & MASK_PGD_3L; + temp = temp >> (PGDIR_SHIFT_3L - 3); + page_dir = SYMBOL(swapper_pg_dir) + temp; + if (!readmem(VADDR, page_dir, &pgd_pte, sizeof pgd_pte)) { + ERRMSG("Can't get pgd_pte (page_dir:%llx).\n", page_dir); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PGD : %16llx => %16llx\n", page_dir, pgd_pte); + + /* + * Get PMD + */ + temp = vaddr & MASK_PMD; + temp = temp >> (PMD_SHIFT - 3); + page_middle = pgd_pte + temp; + if (!readmem(PADDR, page_middle, &pmd_pte, sizeof pmd_pte)) { + ERRMSG("Can't get pmd_pte (page_middle:%llx).\n", page_middle); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PMD : %16llx => %16llx\n", page_middle, pmd_pte); + + /* + * Get PTE + */ + temp = vaddr & MASK_PTE; + temp = temp >> (PAGESHIFT() - 3); + page_table = pmd_pte + temp; + if (!readmem(PADDR, page_table, &pte, sizeof pte)) { + ERRMSG("Can't get pte (page_table:%llx).\n", page_table); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PTE : %16llx => %16llx\n", page_table, pte); + + /* + * Get physical address + */ + temp = vaddr & MASK_POFFSET; + paddr = (pte & _PAGE_PPN_MASK) + temp; + + return paddr; +} + +/* + * Translate a virtual address to a physical address by using 4 levels paging. + */ +unsigned long long +vtop4_ia64(unsigned long vaddr) +{ + unsigned long long paddr, temp, page_dir, pgd_pte, page_upper, pud_pte; + unsigned long long page_middle, pmd_pte, page_table, pte; + + if (SYMBOL(swapper_pg_dir) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of swapper_pg_dir.\n"); + return NOT_PADDR; + } + + /* + * Get PGD + */ + temp = vaddr & MASK_PGD_4L; + temp = temp >> (PGDIR_SHIFT_4L - 3); + page_dir = SYMBOL(swapper_pg_dir) + temp; + if (!readmem(VADDR, page_dir, &pgd_pte, sizeof pgd_pte)) { + ERRMSG("Can't get pgd_pte (page_dir:%llx).\n", page_dir); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PGD : %16llx => %16llx\n", page_dir, pgd_pte); + + /* + * Get PUD + */ + temp = vaddr & MASK_PUD; + temp = temp >> (PUD_SHIFT - 3); + page_upper = pgd_pte + temp; + if (!readmem(PADDR, page_upper, &pud_pte, sizeof pud_pte)) { + ERRMSG("Can't get pud_pte (page_upper:%llx).\n", page_upper); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PUD : %16llx => %16llx\n", page_upper, pud_pte); + + /* + * Get PMD + */ + temp = vaddr & MASK_PMD; + temp = temp >> (PMD_SHIFT - 3); + page_middle = pud_pte + temp; + if (!readmem(PADDR, page_middle, &pmd_pte, sizeof pmd_pte)) { + ERRMSG("Can't get pmd_pte (page_middle:%llx).\n", page_middle); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PMD : %16llx => %16llx\n", page_middle, pmd_pte); + + /* + * Get PTE + */ + temp = vaddr & MASK_PTE; + temp = temp >> (PAGESHIFT() - 3); + page_table = pmd_pte + temp; + if (!readmem(PADDR, page_table, &pte, sizeof pte)) { + ERRMSG("Can't get pte (page_table:%llx).\n", page_table); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PTE : %16llx => %16llx\n", page_table, pte); + + /* + * Get physical address + */ + temp = vaddr & MASK_POFFSET; + paddr = (pte & _PAGE_PPN_MASK) + temp; + + return paddr; +} + +unsigned long long +vtop_ia64(unsigned long vaddr) +{ + unsigned long long paddr; + + if (VADDR_REGION(vaddr) != KERNEL_VMALLOC_REGION) { + ERRMSG("vaddr(%lx) is not KERNEL_VMALLOC_REGION.\n", vaddr); + return NOT_PADDR; + } + paddr = vaddr_to_paddr_general(vaddr); + if (paddr != NOT_PADDR) + return paddr; + + if (!is_vmalloc_addr_ia64(vaddr)) { + paddr = vaddr - info->kernel_start + + (info->phys_base & KERNEL_TR_PAGE_MASK); + if (is_xen_memory()) + paddr = ptom_xen(paddr); + return paddr; + } + + if (vt.mem_flags & MEMORY_PAGETABLE_4L) + return vtop4_ia64(vaddr); + else + return vtop3_ia64(vaddr); +} + +/* + * Translate a virtual address to physical address. + */ +unsigned long long +vaddr_to_paddr_ia64(unsigned long vaddr) +{ + unsigned long long paddr; + + switch (VADDR_REGION(vaddr)) { + case KERNEL_CACHED_REGION: + paddr = vaddr - (ulong)(KERNEL_CACHED_BASE); + break; + + case KERNEL_UNCACHED_REGION: + paddr = vaddr - (ulong)(KERNEL_UNCACHED_BASE); + break; + + case KERNEL_VMALLOC_REGION: + paddr = vtop_ia64(vaddr); + break; + + default: + ERRMSG("Unknown region (%ld)\n", VADDR_REGION(vaddr)); + return 0x0; + } + return paddr; +} + +/* + * for Xen extraction + */ +unsigned long long +kvtop_xen_ia64(unsigned long kvaddr) +{ + unsigned long long addr, dirp, entry; + + if (!is_xen_vaddr(kvaddr)) + return NOT_PADDR; + + if (is_direct(kvaddr)) + return (unsigned long)kvaddr - DIRECTMAP_VIRT_START; + + if (!is_frame_table_vaddr(kvaddr)) + return NOT_PADDR; + + addr = kvaddr - VIRT_FRAME_TABLE_ADDR; + + dirp = SYMBOL(frametable_pg_dir) - DIRECTMAP_VIRT_START; + dirp += ((addr >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) * sizeof(unsigned long long); + if (!readmem(PADDR, dirp, &entry, sizeof(entry))) + return NOT_PADDR; + + dirp = entry & _PFN_MASK; + if (!dirp) + return NOT_PADDR; + + dirp += ((addr >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) * sizeof(unsigned long long); + if (!readmem(PADDR, dirp, &entry, sizeof(entry))) + return NOT_PADDR; + + dirp = entry & _PFN_MASK; + if (!dirp) + return NOT_PADDR; + + dirp += ((addr >> PAGESHIFT()) & (PTRS_PER_PTE - 1)) * sizeof(unsigned long long); + if (!readmem(PADDR, dirp, &entry, sizeof(entry))) + return NOT_PADDR; + + if (!(entry & _PAGE_P)) + return NOT_PADDR; + + entry = (entry & _PFN_MASK) + (addr & ((1UL << PAGESHIFT()) - 1)); + + return entry; +} + +int +get_xen_basic_info_ia64(void) +{ + unsigned long xen_start, xen_end; + + info->frame_table_vaddr = VIRT_FRAME_TABLE_ADDR; /* "frame_table" is same value */ + + if (!info->xen_crash_info.com || + info->xen_crash_info.com->xen_major_version < 4) { + if (SYMBOL(xenheap_phys_end) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of xenheap_phys_end.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, SYMBOL(xenheap_phys_end), &xen_end, + sizeof(xen_end))) { + ERRMSG("Can't get the value of xenheap_phys_end.\n"); + return FALSE; + } + if (SYMBOL(xen_pstart) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of xen_pstart.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, SYMBOL(xen_pstart), &xen_start, + sizeof(xen_start))) { + ERRMSG("Can't get the value of xen_pstart.\n"); + return FALSE; + } + info->xen_heap_start = paddr_to_pfn(xen_start); + info->xen_heap_end = paddr_to_pfn(xen_end); + } + + return TRUE; +} + +int +get_xen_info_ia64(void) +{ + unsigned long xen_heap_start; + int i; + + if (SYMBOL(xen_heap_start) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of xen_heap_start.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, SYMBOL(xen_heap_start), &xen_heap_start, + sizeof(xen_heap_start))) { + ERRMSG("Can't get the value of xen_heap_start.\n"); + return FALSE; + } + for (i = 0; i < info->num_domain; i++) { + info->domain_list[i].pickled_id = (unsigned int) + (info->domain_list[i].domain_addr - xen_heap_start); + } + + return TRUE; +} + +#endif /* ia64 */ + diff --git a/arch/ppc.c b/arch/ppc.c new file mode 100644 index 0000000..37c6a3b --- /dev/null +++ b/arch/ppc.c @@ -0,0 +1,122 @@ +/* + * ppc.c + * + * Created by: Suzuki K. Poulose <suzuki@in.ibm.com> + * - Based on ppc64 implementation + * Copyright (C) IBM Corporation, 2012. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef __powerpc32__ + +#include "../print_info.h" +#include "../elf_info.h" +#include "../makedumpfile.h" + +int +get_machdep_info_ppc(void) +{ + unsigned long vmlist, vmap_area_list, vmalloc_start; + + info->section_size_bits = _SECTION_SIZE_BITS; + info->max_physmem_bits = _MAX_PHYSMEM_BITS; + info->page_offset = __PAGE_OFFSET; + + if (SYMBOL(_stext) != NOT_FOUND_SYMBOL) + info->kernel_start = SYMBOL(_stext); + else { + ERRMSG("Can't get the symbol of _stext.\n"); + return FALSE; + } + + DEBUG_MSG("kernel_start : %lx\n", info->kernel_start); + + /* + * Get vmalloc_start value from either vmap_area_list or vmlist. + */ + if ((SYMBOL(vmap_area_list) != NOT_FOUND_SYMBOL) + && (OFFSET(vmap_area.va_start) != NOT_FOUND_STRUCTURE) + && (OFFSET(vmap_area.list) != NOT_FOUND_STRUCTURE)) { + if (!readmem(VADDR, SYMBOL(vmap_area_list) + OFFSET(list_head.next), + &vmap_area_list, sizeof(vmap_area_list))) { + ERRMSG("Can't get vmap_area_list.\n"); + return FALSE; + } + if (!readmem(VADDR, vmap_area_list - OFFSET(vmap_area.list) + + OFFSET(vmap_area.va_start), &vmalloc_start, + sizeof(vmalloc_start))) { + ERRMSG("Can't get vmalloc_start.\n"); + return FALSE; + } + } else if ((SYMBOL(vmlist) != NOT_FOUND_SYMBOL) + && (OFFSET(vm_struct.addr) != NOT_FOUND_STRUCTURE)) { + if (!readmem(VADDR, SYMBOL(vmlist), &vmlist, sizeof(vmlist))) { + ERRMSG("Can't get vmlist.\n"); + return FALSE; + } + if (!readmem(VADDR, vmlist + OFFSET(vm_struct.addr), &vmalloc_start, + sizeof(vmalloc_start))) { + ERRMSG("Can't get vmalloc_start.\n"); + return FALSE; + } + } else { + /* + * For the compatibility, makedumpfile should run without the symbol + * vmlist and the offset of vm_struct.addr if they are not necessary. + */ + return TRUE; + } + info->vmalloc_start = vmalloc_start; + DEBUG_MSG("vmalloc_start: %lx\n", vmalloc_start); + + return TRUE; +} + +int +is_vmalloc_addr_ppc(unsigned long vaddr) +{ + return (info->vmalloc_start && vaddr >= info->vmalloc_start); +} + +unsigned long long +vaddr_to_paddr_ppc(unsigned long vaddr) +{ + unsigned long *pgd, *pmd; + unsigned long long pte; + unsigned long long paddr; + + paddr = vaddr_to_paddr_general(vaddr); + if (paddr != NOT_PADDR) + return paddr; + + if (((SYMBOL(vmap_area_list) == NOT_FOUND_SYMBOL) + || (OFFSET(vmap_area.va_start) == NOT_FOUND_STRUCTURE) + || (OFFSET(vmap_area.list) == NOT_FOUND_STRUCTURE)) + && ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL) + || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE))) { + ERRMSG("Can't get necessary information for vmalloc translation.\n"); + return NOT_PADDR; + } + if (!is_vmalloc_addr_ppc(vaddr)) + return (vaddr - info->kernel_start); + + /* + * TODO: Support vmalloc translation. + */ + ERRMSG("This makedumpfile does not support vmalloc translation.\n"); + return NOT_PADDR; +} + +#endif /* powerpc32 */ diff --git a/arch/ppc64.c b/arch/ppc64.c new file mode 100644 index 0000000..8b6f7d5 --- /dev/null +++ b/arch/ppc64.c @@ -0,0 +1,671 @@ +/* + * ppc64.c + * + * Created by: Sachin Sant (sachinp@in.ibm.com) + * Copyright (C) IBM Corporation, 2006. All rights reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef __powerpc64__ + +#include "../print_info.h" +#include "../elf_info.h" +#include "../makedumpfile.h" +#include <endian.h> + +/* + * Swaps a 8 byte value + */ +static ulong swap64(ulong val, uint swap) +{ + if (swap) + return (((val & 0x00000000000000ffULL) << 56) | + ((val & 0x000000000000ff00ULL) << 40) | + ((val & 0x0000000000ff0000ULL) << 24) | + ((val & 0x00000000ff000000ULL) << 8) | + ((val & 0x000000ff00000000ULL) >> 8) | + ((val & 0x0000ff0000000000ULL) >> 24) | + ((val & 0x00ff000000000000ULL) >> 40) | + ((val & 0xff00000000000000ULL) >> 56)); + else + return val; +} + +/* + * Convert physical address to kernel virtual address + */ +static inline ulong paddr_to_vaddr_ppc64(ulong paddr) +{ + return (paddr + info->kernel_start); +} + +/* + * Convert the raw pgd entry to next pgtable adress + */ +static inline ulong pgd_page_vaddr_l4(ulong pgd) +{ + ulong pgd_val; + + pgd_val = (pgd & ~info->pgd_masked_bits); + if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) { + /* + * physical address is stored starting from kernel v4.6 + */ + pgd_val = paddr_to_vaddr_ppc64(pgd_val); + } + + return pgd_val; +} + +/* + * Convert the raw pud entry to next pgtable adress + */ +static inline ulong pud_page_vaddr_l4(ulong pud) +{ + ulong pud_val; + + pud_val = (pud & ~info->pud_masked_bits); + if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) { + /* + * physical address is stored starting from kernel v4.6 + */ + pud_val = paddr_to_vaddr_ppc64(pud_val); + } + + return pud_val; +} + +/* + * Convert the raw pmd entry to next pgtable adress + */ +static inline ulong pmd_page_vaddr_l4(ulong pmd) +{ + ulong pmd_val; + + pmd_val = (pmd & ~info->pmd_masked_bits); + if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) { + /* + * physical address is stored starting from kernel v4.6 + */ + pmd_val = paddr_to_vaddr_ppc64(pmd_val); + } + + return pmd_val; +} + +/* + * This function traverses vmemmap list to get the count of vmemmap regions + * and populates the regions' info in info->vmemmap_list[] + */ +static int +get_vmemmap_list_info(ulong head) +{ + int i, cnt; + long backing_size, virt_addr_offset, phys_offset, list_offset; + ulong curr, next; + char *vmemmap_buf = NULL; + + backing_size = SIZE(vmemmap_backing); + virt_addr_offset = OFFSET(vmemmap_backing.virt_addr); + phys_offset = OFFSET(vmemmap_backing.phys); + list_offset = OFFSET(vmemmap_backing.list); + info->vmemmap_list = NULL; + + /* + * Get list count by traversing the vmemmap list + */ + cnt = 0; + curr = head; + next = 0; + do { + if (!readmem(VADDR, (curr + list_offset), &next, + sizeof(next))) { + ERRMSG("Can't get vmemmap region addresses\n"); + goto err; + } + curr = next; + cnt++; + } while ((next != 0) && (next != head)); + + /* + * Using temporary buffer to save vmemmap region information + */ + vmemmap_buf = calloc(1, backing_size); + if (vmemmap_buf == NULL) { + ERRMSG("Can't allocate memory for vmemmap_buf. %s\n", + strerror(errno)); + goto err; + } + + info->vmemmap_list = calloc(1, cnt * sizeof(struct ppc64_vmemmap)); + if (info->vmemmap_list == NULL) { + ERRMSG("Can't allocate memory for vmemmap_list. %s\n", + strerror(errno)); + goto err; + } + + curr = head; + for (i = 0; i < cnt; i++) { + if (!readmem(VADDR, curr, vmemmap_buf, backing_size)) { + ERRMSG("Can't get vmemmap region info\n"); + goto err; + } + + info->vmemmap_list[i].phys = ULONG(vmemmap_buf + phys_offset); + info->vmemmap_list[i].virt = ULONG(vmemmap_buf + + virt_addr_offset); + curr = ULONG(vmemmap_buf + list_offset); + + if (info->vmemmap_list[i].virt < info->vmemmap_start) + info->vmemmap_start = info->vmemmap_list[i].virt; + + if ((info->vmemmap_list[i].virt + info->vmemmap_psize) > + info->vmemmap_end) + info->vmemmap_end = (info->vmemmap_list[i].virt + + info->vmemmap_psize); + } + + free(vmemmap_buf); + return cnt; +err: + free(vmemmap_buf); + free(info->vmemmap_list); + return 0; +} + +/* + * Verify that the kernel has made the vmemmap list available, + * and if so, stash the relevant data required to make vtop + * translations. + */ +static int +ppc64_vmemmap_init(void) +{ + int psize, shift; + ulong head; + + if ((SYMBOL(vmemmap_list) == NOT_FOUND_SYMBOL) + || (SYMBOL(mmu_psize_defs) == NOT_FOUND_SYMBOL) + || (SYMBOL(mmu_vmemmap_psize) == NOT_FOUND_SYMBOL) + || (SIZE(vmemmap_backing) == NOT_FOUND_STRUCTURE) + || (SIZE(mmu_psize_def) == NOT_FOUND_STRUCTURE) + || (OFFSET(mmu_psize_def.shift) == NOT_FOUND_STRUCTURE) + || (OFFSET(vmemmap_backing.phys) == NOT_FOUND_STRUCTURE) + || (OFFSET(vmemmap_backing.virt_addr) == NOT_FOUND_STRUCTURE) + || (OFFSET(vmemmap_backing.list) == NOT_FOUND_STRUCTURE)) + return FALSE; + + if (!readmem(VADDR, SYMBOL(mmu_vmemmap_psize), &psize, sizeof(int))) + return FALSE; + + if (!readmem(VADDR, SYMBOL(mmu_psize_defs) + + (SIZE(mmu_psize_def) * psize) + + OFFSET(mmu_psize_def.shift), &shift, sizeof(int))) + return FALSE; + info->vmemmap_psize = 1 << shift; + + if (!readmem(VADDR, SYMBOL(vmemmap_list), &head, sizeof(unsigned long))) + return FALSE; + + /* + * Get vmemmap list count and populate vmemmap regions info + */ + info->vmemmap_cnt = get_vmemmap_list_info(head); + if (info->vmemmap_cnt == 0) + return FALSE; + + info->flag_vmemmap = TRUE; + return TRUE; +} + +static int +ppc64_vmalloc_init(void) +{ + if (info->page_size == 65536) { + /* + * 64K pagesize + */ + if (info->cur_mmu_type & RADIX_MMU) { + info->l1_index_size = PTE_INDEX_SIZE_RADIX_64K; + info->l2_index_size = PMD_INDEX_SIZE_RADIX_64K; + info->l3_index_size = PUD_INDEX_SIZE_RADIX_64K; + info->l4_index_size = PGD_INDEX_SIZE_RADIX_64K; + + } else if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) { + info->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10; + + if (info->kernel_version >= KERNEL_VERSION(4, 12, 0)) { + info->l2_index_size = PMD_INDEX_SIZE_L4_64K_4_12; + if (info->kernel_version >= KERNEL_VERSION(4, 17, 0)) + info->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_17; + else + info->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_12; + info->l4_index_size = PGD_INDEX_SIZE_L4_64K_4_12; + } else { + info->l2_index_size = PMD_INDEX_SIZE_L4_64K_4_6; + info->l3_index_size = PUD_INDEX_SIZE_L4_64K_4_6; + info->l4_index_size = PGD_INDEX_SIZE_L4_64K_3_10; + } + } else if (info->kernel_version >= KERNEL_VERSION(3, 10, 0)) { + info->l1_index_size = PTE_INDEX_SIZE_L4_64K_3_10; + info->l2_index_size = PMD_INDEX_SIZE_L4_64K_3_10; + info->l3_index_size = PUD_INDEX_SIZE_L4_64K; + info->l4_index_size = PGD_INDEX_SIZE_L4_64K_3_10; + } else { + info->l1_index_size = PTE_INDEX_SIZE_L4_64K; + info->l2_index_size = PMD_INDEX_SIZE_L4_64K; + info->l3_index_size = PUD_INDEX_SIZE_L4_64K; + info->l4_index_size = PGD_INDEX_SIZE_L4_64K; + } + + info->pte_rpn_shift = (SYMBOL(demote_segment_4k) ? + PTE_RPN_SHIFT_L4_64K_V2 : PTE_RPN_SHIFT_L4_64K_V1); + + if (info->kernel_version >= KERNEL_VERSION(4, 6, 0)) { + info->pgd_masked_bits = PGD_MASKED_BITS_64K_4_6; + info->pud_masked_bits = PUD_MASKED_BITS_64K_4_6; + info->pmd_masked_bits = PMD_MASKED_BITS_64K_4_6; + } else { + info->pgd_masked_bits = PGD_MASKED_BITS_64K; + info->pud_masked_bits = PUD_MASKED_BITS_64K; + info->pmd_masked_bits = (info->kernel_version >= KERNEL_VERSION(3, 11, 0) ? + PMD_MASKED_BITS_64K_3_11 : PMD_MASKED_BITS_64K); + } + } else { + /* + * 4K pagesize + */ + if (info->cur_mmu_type & RADIX_MMU) { + info->l1_index_size = PTE_INDEX_SIZE_RADIX_4K; + info->l2_index_size = PMD_INDEX_SIZE_RADIX_4K; + info->l3_index_size = PUD_INDEX_SIZE_RADIX_4K; + info->l4_index_size = PGD_INDEX_SIZE_RADIX_4K; + + } else { + info->l1_index_size = PTE_INDEX_SIZE_L4_4K; + info->l2_index_size = PMD_INDEX_SIZE_L4_4K; + info->l3_index_size = (info->kernel_version >= KERNEL_VERSION(3, 7, 0) ? + PUD_INDEX_SIZE_L4_4K_3_7 : PUD_INDEX_SIZE_L4_4K); + info->l4_index_size = PGD_INDEX_SIZE_L4_4K; + } + + info->pte_rpn_shift = (info->kernel_version >= KERNEL_VERSION(4, 5, 0) ? + PTE_RPN_SHIFT_L4_4K_4_5 : PTE_RPN_SHIFT_L4_4K); + + info->pgd_masked_bits = PGD_MASKED_BITS_4K; + info->pud_masked_bits = PUD_MASKED_BITS_4K; + info->pmd_masked_bits = PMD_MASKED_BITS_4K; + } + + if (info->kernel_version >= KERNEL_VERSION(4, 7, 0)) { + info->pgd_masked_bits = PGD_MASKED_BITS_4_7; + info->pud_masked_bits = PUD_MASKED_BITS_4_7; + info->pmd_masked_bits = PMD_MASKED_BITS_4_7; + } + + info->pte_rpn_mask = PTE_RPN_MASK_DEFAULT; + if ((info->kernel_version >= KERNEL_VERSION(4, 6, 0)) && + (info->kernel_version < KERNEL_VERSION(4, 11, 0))) { + info->pte_rpn_mask = PTE_RPN_MASK_L4_4_6; + info->pte_rpn_shift = PTE_RPN_SHIFT_L4_4_6; + } + + if (info->kernel_version >= KERNEL_VERSION(4, 11, 0)) { + info->pte_rpn_mask = PTE_RPN_MASK_L4_4_11; + info->pte_rpn_shift = PTE_RPN_SHIFT_L4_4_11; + } + + /* + * Compute ptrs per each level + */ + info->l1_shift = info->page_shift; + info->ptrs_per_l1 = (1 << info->l1_index_size); + info->ptrs_per_l2 = (1 << info->l2_index_size); + info->ptrs_per_l3 = (1 << info->l3_index_size); + info->ptrs_per_l4 = (1 << info->l4_index_size); + info->ptrs_per_pgd = info->ptrs_per_l4; + + /* + * Compute shifts + */ + info->l2_shift = info->l1_shift + info->l1_index_size; + info->l3_shift = info->l2_shift + info->l2_index_size; + info->l4_shift = info->l3_shift + info->l3_index_size; + + return TRUE; +} + +/* + * If the vmemmap address translation information is stored in the kernel, + * make the translation. + */ +static unsigned long long +ppc64_vmemmap_to_phys(unsigned long vaddr) +{ + int i; + ulong offset; + unsigned long long paddr = NOT_PADDR; + + for (i = 0; i < info->vmemmap_cnt; i++) { + if ((vaddr >= info->vmemmap_list[i].virt) && (vaddr < + (info->vmemmap_list[i].virt + info->vmemmap_psize))) { + offset = vaddr - info->vmemmap_list[i].virt; + paddr = info->vmemmap_list[i].phys + offset; + break; + } + } + + return paddr; +} + +static unsigned long long +ppc64_vtop_level4(unsigned long vaddr) +{ + ulong *level4; + ulong *pgdir, *page_upper; + ulong *page_middle, *page_table; + unsigned long long pgd_pte, pud_pte; + unsigned long long pmd_pte, pte; + unsigned long long paddr = NOT_PADDR; + uint swap = 0; + + if (info->page_buf == NULL) { + /* + * This is the first vmalloc address translation request + */ + info->page_buf = (char *)calloc(1, PAGESIZE()); + if (info->page_buf == NULL) { + ERRMSG("Can't allocate memory to read page tables. %s\n", + strerror(errno)); + return NOT_PADDR; + } + } + + if (info->kernel_version >= KERNEL_VERSION(4, 7, 0)) { + /* + * Starting with kernel v4.7, page table entries are always + * big endian on server processors. Set this flag if + * kernel is not big endian. + */ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + swap = 1; + } + + level4 = (ulong *)info->kernel_pgd; + pgdir = (ulong *)((ulong *)level4 + PGD_OFFSET_L4(vaddr)); + if (!readmem(VADDR, PAGEBASE(level4), info->page_buf, PAGESIZE())) { + ERRMSG("Can't read PGD page: 0x%llx\n", PAGEBASE(level4)); + return NOT_PADDR; + } + pgd_pte = swap64(ULONG((info->page_buf + PAGEOFFSET(pgdir))), swap); + if (!pgd_pte) + return NOT_PADDR; + + /* + * Sometimes we don't have level3 pagetable entries + */ + if (info->l3_index_size != 0) { + pgd_pte = pgd_page_vaddr_l4(pgd_pte); + page_upper = (ulong *)((ulong *)pgd_pte + PUD_OFFSET_L4(vaddr)); + if (!readmem(VADDR, PAGEBASE(pgd_pte), info->page_buf, PAGESIZE())) { + ERRMSG("Can't read PUD page: 0x%llx\n", PAGEBASE(pgd_pte)); + return NOT_PADDR; + } + pud_pte = swap64(ULONG((info->page_buf + PAGEOFFSET(page_upper))), swap); + if (!pud_pte) + return NOT_PADDR; + } else { + pud_pte = pgd_pte; + } + + pud_pte = pud_page_vaddr_l4(pud_pte); + page_middle = (ulong *)((ulong *)pud_pte + PMD_OFFSET_L4(vaddr)); + if (!readmem(VADDR, PAGEBASE(pud_pte), info->page_buf, PAGESIZE())) { + ERRMSG("Can't read PMD page: 0x%llx\n", PAGEBASE(pud_pte)); + return NOT_PADDR; + } + pmd_pte = swap64(ULONG((info->page_buf + PAGEOFFSET(page_middle))), swap); + if (!(pmd_pte)) + return NOT_PADDR; + + pmd_pte = pmd_page_vaddr_l4(pmd_pte); + page_table = (ulong *)(pmd_pte) + + (BTOP(vaddr) & (info->ptrs_per_l1 - 1)); + if (!readmem(VADDR, PAGEBASE(pmd_pte), info->page_buf, PAGESIZE())) { + ERRMSG("Can't read page table: 0x%llx\n", PAGEBASE(pmd_pte)); + return NOT_PADDR; + } + pte = swap64(ULONG((info->page_buf + PAGEOFFSET(page_table))), swap); + if (!(pte & _PAGE_PRESENT)) { + ERRMSG("Page not present!\n"); + return NOT_PADDR; + } + + if (!pte) + return NOT_PADDR; + + paddr = PAGEBASE(PTOB((pte & info->pte_rpn_mask) >> info->pte_rpn_shift)) + + PAGEOFFSET(vaddr); + + return paddr; +} + +int +set_ppc64_max_physmem_bits(void) +{ + long array_len = ARRAY_LENGTH(mem_section); + /* + * The older ppc64 kernels uses _MAX_PHYSMEM_BITS as 42 and the + * newer kernels 3.7 onwards uses 46 bits. + */ + + info->max_physmem_bits = _MAX_PHYSMEM_BITS_ORIG ; + if ((array_len == (NR_MEM_SECTIONS() / _SECTIONS_PER_ROOT_EXTREME())) + || (array_len == (NR_MEM_SECTIONS() / _SECTIONS_PER_ROOT()))) + return TRUE; + + info->max_physmem_bits = _MAX_PHYSMEM_BITS_3_7; + if ((array_len == (NR_MEM_SECTIONS() / _SECTIONS_PER_ROOT_EXTREME())) + || (array_len == (NR_MEM_SECTIONS() / _SECTIONS_PER_ROOT()))) + return TRUE; + + return FALSE; +} + +int +get_machdep_info_ppc64(void) +{ + unsigned long vmlist, vmap_area_list, vmalloc_start; + + info->section_size_bits = _SECTION_SIZE_BITS; + if (!set_ppc64_max_physmem_bits()) { + ERRMSG("Can't detect max_physmem_bits.\n"); + return FALSE; + } + info->page_offset = __PAGE_OFFSET; + + if (SYMBOL(_stext) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of _stext.\n"); + return FALSE; + } + info->kernel_start = SYMBOL(_stext); + DEBUG_MSG("kernel_start : %lx\n", info->kernel_start); + + /* + * Get vmalloc_start value from either vmap_area_list or vmlist. + */ + if ((SYMBOL(vmap_area_list) != NOT_FOUND_SYMBOL) + && (OFFSET(vmap_area.va_start) != NOT_FOUND_STRUCTURE) + && (OFFSET(vmap_area.list) != NOT_FOUND_STRUCTURE)) { + if (!readmem(VADDR, SYMBOL(vmap_area_list) + OFFSET(list_head.next), + &vmap_area_list, sizeof(vmap_area_list))) { + ERRMSG("Can't get vmap_area_list.\n"); + return FALSE; + } + if (!readmem(VADDR, vmap_area_list - OFFSET(vmap_area.list) + + OFFSET(vmap_area.va_start), &vmalloc_start, + sizeof(vmalloc_start))) { + ERRMSG("Can't get vmalloc_start.\n"); + return FALSE; + } + } else if ((SYMBOL(vmlist) != NOT_FOUND_SYMBOL) + && (OFFSET(vm_struct.addr) != NOT_FOUND_STRUCTURE)) { + if (!readmem(VADDR, SYMBOL(vmlist), &vmlist, sizeof(vmlist))) { + ERRMSG("Can't get vmlist.\n"); + return FALSE; + } + if (!readmem(VADDR, vmlist + OFFSET(vm_struct.addr), &vmalloc_start, + sizeof(vmalloc_start))) { + ERRMSG("Can't get vmalloc_start.\n"); + return FALSE; + } + } else { + /* + * For the compatibility, makedumpfile should run without the symbol + * vmlist and the offset of vm_struct.addr if they are not necessary. + */ + return TRUE; + } + info->vmalloc_start = vmalloc_start; + DEBUG_MSG("vmalloc_start: %lx\n", vmalloc_start); + + if (SYMBOL(swapper_pg_dir) != NOT_FOUND_SYMBOL) { + info->kernel_pgd = SYMBOL(swapper_pg_dir); + } else if (SYMBOL(cpu_pgd) != NOT_FOUND_SYMBOL) { + info->kernel_pgd = SYMBOL(cpu_pgd); + } else { + ERRMSG("No swapper_pg_dir or cpu_pgd symbols exist\n"); + return FALSE; + } + + if (SYMBOL(vmemmap_list) != NOT_FOUND_SYMBOL) { + info->vmemmap_start = VMEMMAP_REGION_ID << REGION_SHIFT; + info->vmemmap_end = info->vmemmap_start; + if (ppc64_vmemmap_init() == FALSE) { + ERRMSG("Can't get vmemmap list info.\n"); + return FALSE; + } + DEBUG_MSG("vmemmap_start: %lx\n", info->vmemmap_start); + } + + return TRUE; +} + +int +get_versiondep_info_ppc64() +{ + unsigned long cur_cpu_spec; + uint mmu_features; + + /* + * On PowerISA 3.0 based server processors, a kernel can run with + * radix MMU or standard MMU. Get the current MMU type. + */ + info->cur_mmu_type = STD_MMU; + if ((SYMBOL(cur_cpu_spec) != NOT_FOUND_SYMBOL) + && (OFFSET(cpu_spec.mmu_features) != NOT_FOUND_STRUCTURE)) { + if (readmem(VADDR, SYMBOL(cur_cpu_spec), &cur_cpu_spec, + sizeof(cur_cpu_spec))) { + if (readmem(VADDR, cur_cpu_spec + OFFSET(cpu_spec.mmu_features), + &mmu_features, sizeof(mmu_features))) + info->cur_mmu_type = mmu_features & RADIX_MMU; + } + } + + /* + * Initialize Linux page table info + */ + if (ppc64_vmalloc_init() == FALSE) { + ERRMSG("Can't initialize for vmalloc translation\n"); + return FALSE; + } + info->page_offset = __PAGE_OFFSET; + + return TRUE; +} + +int +is_vmalloc_addr_ppc64(unsigned long vaddr) +{ + return (info->vmalloc_start && vaddr >= info->vmalloc_start); +} + +unsigned long long +vaddr_to_paddr_ppc64(unsigned long vaddr) +{ + unsigned long long paddr; + + if ((info->flag_vmemmap) + && (vaddr >= info->vmemmap_start)) { + return ppc64_vmemmap_to_phys(vaddr); + } + + paddr = vaddr_to_paddr_general(vaddr); + if (paddr != NOT_PADDR) + return paddr; + + if (!is_vmalloc_addr_ppc64(vaddr)) + return (vaddr - info->kernel_start); + + if ((SYMBOL(vmap_area_list) == NOT_FOUND_SYMBOL) + || (OFFSET(vmap_area.va_start) == NOT_FOUND_STRUCTURE) + || (OFFSET(vmap_area.list) == NOT_FOUND_STRUCTURE)) { + if ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL) + || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE)) { + ERRMSG("Can't get info for vmalloc translation.\n"); + return NOT_PADDR; + } + } + + return ppc64_vtop_level4(vaddr); +} + +int arch_crashkernel_mem_size_ppc64() +{ + const char f_crashsize[] = "/proc/device-tree/chosen/linux,crashkernel-size"; + const char f_crashbase[] = "/proc/device-tree/chosen/linux,crashkernel-base"; + unsigned long crashk_sz_be, crashk_sz; + unsigned long crashk_base_be, crashk_base; + uint swap; + FILE *fp, *fpb; + + fp = fopen(f_crashsize, "r"); + if (!fp) { + ERRMSG("Cannot open %s\n", f_crashsize); + return FALSE; + } + fpb = fopen(f_crashbase, "r"); + if (!fp) { + ERRMSG("Cannot open %s\n", f_crashbase); + fclose(fp); + return FALSE; + } + + fread(&crashk_sz_be, sizeof(crashk_sz_be), 1, fp); + fread(&crashk_base_be, sizeof(crashk_base_be), 1, fpb); + fclose(fp); + fclose(fpb); + /* dev tree is always big endian */ + swap = !is_bigendian(); + crashk_sz = swap64(crashk_sz_be, swap); + crashk_base = swap64(crashk_base_be, swap); + crash_reserved_mem_nr = 1; + crash_reserved_mem[0].start = crashk_base; + crash_reserved_mem[0].end = crashk_base + crashk_sz - 1; + + return TRUE; +} + +#endif /* powerpc64 */ diff --git a/arch/s390x.c b/arch/s390x.c new file mode 100644 index 0000000..bf9d58e --- /dev/null +++ b/arch/s390x.c @@ -0,0 +1,338 @@ +/* + * s390x.c + * + * Created by: Michael Holzheu (holzheu@de.ibm.com) + * Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef __s390x__ + +#include "../print_info.h" +#include "../elf_info.h" +#include "../makedumpfile.h" + +#define TABLE_SIZE 4096 + +/* + * Bits in the virtual address + * + * |<----- RX ---------->| + * | RFX | RSX | RTX | SX | PX | BX | + * 0 11 22 33 44 52 63 + * + * RX: Region Index + * RFX: Region first index + * RSX: Region second index + * RTX: Region third index + * SX: Segment index + * PX: Page index + * BX: Byte index + * + * RX part of vaddr is divided into three fields RFX, RSX and RTX each of + * 11 bit in size + */ +#define _REGION_INDEX_SHIFT 11 +#define _PAGE_INDEX_MASK 0xff000UL /* page index (PX) mask */ +#define _BYTE_INDEX_MASK 0x00fffUL /* Byte index (BX) mask */ +#define _PAGE_BYTE_INDEX_MASK (_PAGE_INDEX_MASK | _BYTE_INDEX_MASK) + +/* Region/segment table index */ +#define rsg_index(x, y) \ + (((x) >> ((_REGION_INDEX_SHIFT * y) + _SEGMENT_INDEX_SHIFT)) \ + & _REGION_OFFSET_MASK) +/* Page table index */ +#define pte_index(x) (((x) >> _PAGE_INDEX_SHIFT) & _PAGE_OFFSET_MASK) + +#define rsg_offset(x, y) (rsg_index( x, y) * sizeof(unsigned long)) +#define pte_offset(x) (pte_index(x) * sizeof(unsigned long)) + +int +set_s390x_max_physmem_bits(void) +{ + long array_len = ARRAY_LENGTH(mem_section); + /* + * The older s390x kernels uses _MAX_PHYSMEM_BITS as 42 and the + * newer kernels uses 46 bits. + */ + + info->max_physmem_bits = _MAX_PHYSMEM_BITS_ORIG ; + if ((array_len == (NR_MEM_SECTIONS() / _SECTIONS_PER_ROOT_EXTREME())) + || (array_len == (NR_MEM_SECTIONS() / _SECTIONS_PER_ROOT()))) + return TRUE; + + info->max_physmem_bits = _MAX_PHYSMEM_BITS_3_3; + if ((array_len == (NR_MEM_SECTIONS() / _SECTIONS_PER_ROOT_EXTREME())) + || (array_len == (NR_MEM_SECTIONS() / _SECTIONS_PER_ROOT()))) + return TRUE; + + return FALSE; +} + +int +get_machdep_info_s390x(void) +{ + unsigned long vmalloc_start; + char *term_str = getenv("TERM"); + + if (term_str && strcmp(term_str, "dumb") == 0) + /* '\r' control character is ignored on "dumb" terminal. */ + flag_ignore_r_char = 1; + + info->section_size_bits = _SECTION_SIZE_BITS; + if (!set_s390x_max_physmem_bits()) { + ERRMSG("Can't detect max_physmem_bits.\n"); + return FALSE; + } + info->page_offset = __PAGE_OFFSET; + + if (SYMBOL(_stext) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of _stext.\n"); + return FALSE; + } + info->kernel_start = SYMBOL(_stext); + DEBUG_MSG("kernel_start : %lx\n", info->kernel_start); + + /* + * Obtain the vmalloc_start address from high_memory symbol. + */ + if (SYMBOL(high_memory) == NOT_FOUND_SYMBOL) { + return TRUE; + } + if (!readmem(VADDR, SYMBOL(high_memory), &vmalloc_start, + sizeof(vmalloc_start))) { + ERRMSG("Can't get vmalloc_start.\n"); + return FALSE; + } + info->vmalloc_start = vmalloc_start; + DEBUG_MSG("vmalloc_start: %lx\n", vmalloc_start); + + return TRUE; +} + +static int +is_vmalloc_addr_s390x(unsigned long vaddr) +{ + return (info->vmalloc_start && vaddr >= info->vmalloc_start); +} + +static int +rsg_table_entry_bad(unsigned long entry, int level) +{ + unsigned long mask = ~_REGION_ENTRY_INVALID + & ~_REGION_ENTRY_TYPE_MASK + & ~_REGION_ENTRY_LENGTH + & ~_SEGMENT_ENTRY_LARGE + & ~_SEGMENT_ENTRY_CO; + + if (level) + mask &= ~_REGION_ENTRY_ORIGIN; + else + mask &= ~_SEGMENT_ENTRY_ORIGIN; + + return (entry & mask) != 0; +} + +/* Region or segment table traversal function */ +static unsigned long +_kl_rsg_table_deref_s390x(unsigned long vaddr, unsigned long table, + int len, int level) +{ + unsigned long offset, entry; + + offset = rsg_offset(vaddr, level); + + /* check if offset is over the table limit. */ + if (offset >= ((len + 1) * TABLE_SIZE)) { + ERRMSG("offset is over the table limit.\n"); + return 0; + } + + if (!readmem(VADDR, table + offset, &entry, sizeof(entry))) { + if (level) + ERRMSG("Can't read region table %d entry\n", level); + else + ERRMSG("Can't read segment table entry\n"); + return 0; + } + /* + * Check if the segment table entry could be read and doesn't have + * any of the reserved bits set. + */ + if (rsg_table_entry_bad(entry, level)) { + ERRMSG("Bad region/segment table entry.\n"); + return 0; + } + /* + * Check if the region/segment table entry is with valid + * level and not invalid. + */ + if ((RSG_TABLE_LEVEL(entry) != level) + && (entry & _REGION_ENTRY_INVALID)) { + ERRMSG("Invalid region/segment table level or entry.\n"); + return 0; + } + + return entry; +} + +/* Page table traversal function */ +static ulong _kl_pg_table_deref_s390x(unsigned long vaddr, unsigned long table) +{ + unsigned long offset, entry; + + offset = pte_offset(vaddr); + readmem(VADDR, table + offset, &entry, sizeof(entry)); + /* + * Check if the page table entry could be read and doesn't have + * the reserved bit set. + * Check if the page table entry has the invalid bit set. + */ + if (entry & (_PAGE_ZERO | _PAGE_INVALID)) { + ERRMSG("Invalid page table entry.\n"); + return 0; + } + + return entry; +} + +/* vtop_s390x() - translate virtual address to physical + * @vaddr: virtual address to translate + * + * Function converts the @vaddr into physical address using page tables. + * + * Return: + * Physical address or NOT_PADDR if translation fails. + */ +static unsigned long long +vtop_s390x(unsigned long vaddr) +{ + unsigned long long paddr = NOT_PADDR; + unsigned long table, entry; + int level, len; + + if (SYMBOL(swapper_pg_dir) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of swapper_pg_dir.\n"); + return NOT_PADDR; + } + table = SYMBOL(swapper_pg_dir); + + /* Read the first entry to find the number of page table levels. */ + readmem(VADDR, table, &entry, sizeof(entry)); + level = TABLE_LEVEL(entry); + len = TABLE_LENGTH(entry); + + if ((vaddr >> (_SEGMENT_PAGE_SHIFT + (_REGION_INDEX_SHIFT * level)))) { + ERRMSG("Address too big for the number of page table " \ + "levels.\n"); + return NOT_PADDR; + } + + /* + * Walk the region and segment tables. + */ + while (level >= 0) { + entry = _kl_rsg_table_deref_s390x(vaddr, table, len, level); + if (!entry) { + return NOT_PADDR; + } + table = entry & _REGION_ENTRY_ORIGIN; + if ((entry & _REGION_ENTRY_LARGE) && (level == 1)) { + table &= ~0x7fffffffUL; + paddr = table + (vaddr & 0x7fffffffUL); + return paddr; + } + len = RSG_TABLE_LENGTH(entry); + level--; + } + + /* + * Check if this is a large page. + * if yes, then add the 1MB page offset (PX + BX) and return the value. + * if no, then get the page table entry using PX index. + */ + if (entry & _SEGMENT_ENTRY_LARGE) { + table &= ~_PAGE_BYTE_INDEX_MASK; + paddr = table + (vaddr & _PAGE_BYTE_INDEX_MASK); + } else { + entry = _kl_pg_table_deref_s390x(vaddr, + entry & _SEGMENT_ENTRY_ORIGIN); + if (!entry) + return NOT_PADDR; + + /* + * Isolate the page origin from the page table entry. + * Add the page offset (BX). + */ + paddr = (entry & _REGION_ENTRY_ORIGIN) + + (vaddr & _BYTE_INDEX_MASK); + } + + return paddr; +} + +unsigned long long +vaddr_to_paddr_s390x(unsigned long vaddr) +{ + unsigned long long paddr; + + paddr = vaddr_to_paddr_general(vaddr); + if (paddr != NOT_PADDR) + return paddr; + + if (SYMBOL(high_memory) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get necessary information for vmalloc " + "translation.\n"); + return NOT_PADDR; + } + + if (is_vmalloc_addr_s390x(vaddr)) { + paddr = vtop_s390x(vaddr); + } + else { + paddr = vaddr - KVBASE; + } + + return paddr; +} + +struct addr_check { + unsigned long addr; + int found; +}; + +static int phys_addr_callback(void *data, int nr, char *str, + unsigned long base, unsigned long length) +{ + struct addr_check *addr_check = data; + unsigned long addr = addr_check->addr; + + if (addr >= base && addr < base + length) { + addr_check->found = 1; + return -1; + } + return 0; +} + +int is_iomem_phys_addr_s390x(unsigned long addr) +{ + /* Implicit VtoP conversion will be performed for addr here. */ + struct addr_check addr_check = {addr, 0}; + + iomem_for_each_line("System RAM\n", phys_addr_callback, &addr_check); + return addr_check.found; +} + +#endif /* __s390x__ */ diff --git a/arch/sparc64.c b/arch/sparc64.c new file mode 100644 index 0000000..1cfaa85 --- /dev/null +++ b/arch/sparc64.c @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2014, 2017 Oracle and/or its affiliates + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation (version 2 of the License). + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifdef __sparc64__ + +#include "../elf_info.h" +#include "../makedumpfile.h" +#include "../print_info.h" + +int get_versiondep_info_sparc64(void) +{ + info->section_size_bits = _SECTION_SIZE_BITS; + + if (info->kernel_version >= KERNEL_VERSION(3, 8, 13)) + info->max_physmem_bits = _MAX_PHYSMEM_BITS_L4; + else { + info->max_physmem_bits = _MAX_PHYSMEM_BITS_L3; + info->flag_vmemmap = TRUE; + info->vmemmap_start = VMEMMAP_BASE_SPARC64; + info->vmemmap_end = VMEMMAP_BASE_SPARC64 + + ((1UL << (info->max_physmem_bits - PAGE_SHIFT)) * + SIZE(page)); + } + + return TRUE; +} + +int get_phys_base_sparc64(void) +{ + /* Ideally we'd search the pt_load entries until we found one + * containing KVBASE (_stext), but get_symbol_info hasn't been + * called yet. We'll just go with the first entry. + */ + unsigned long long phys_start; + unsigned long long virt_start; + unsigned long long virt_end; + + if (get_pt_load(0, &phys_start, NULL, &virt_start, &virt_end)) { + info->phys_base = phys_start & ~KVBASE_MASK; + return TRUE; + } + ERRMSG("Can't find kernel segment\n"); + return FALSE; +} + +int is_vmalloc_addr_sparc64(unsigned long vaddr) +{ + return (vaddr >= VMALLOC_START_SPARC64); +} + +int is_vmemmap_addr_sparc64(unsigned long vaddr) +{ + if (info->flag_vmemmap && + (vaddr >= info->vmemmap_start) && (vaddr < info->vmemmap_end)) + return TRUE; + + return FALSE; +} + +unsigned long vmemmap_to_phys_sparc64(unsigned long vaddr) +{ + unsigned long vmemmap_table; + unsigned long offset = vaddr - info->vmemmap_start; + unsigned long chunk_offset = offset & ~VMEMMAP_CHUNK_MASK; + unsigned long chunk; + unsigned long index; + unsigned long pte; + unsigned long pte_paddr; + unsigned long pte_offset; + + vmemmap_table = SYMBOL(vmemmap_table); + if (vmemmap_table == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get symbol of vmemmap_table\n"); + return NOT_PADDR; + } + + index = offset >> NR_CHUNKS_SHIFT; + if (!readmem(VADDR, vmemmap_table + (index * sizeof(long)), + &pte_paddr, sizeof(long))) { + ERRMSG("Error reading 1st level vmemmap_table\n"); + return NOT_PADDR; + } + chunk = (vaddr & ~NR_CHUNKS_MASK) >> VMEMMAP_CHUNK_SHIFT; + pte_offset = chunk * sizeof(pte); + pte_paddr += pte_offset; + if (!readmem(PADDR, pte_paddr, &pte, sizeof(pte))) { + ERRMSG("Error reading 2nd level vmemmap_table\n"); + return NOT_PADDR; + } + return pte_to_pa(pte) | chunk_offset; +} + +unsigned long vtop3_sparc64(unsigned long vaddr) +{ + unsigned long pgdir, pgd_paddr, pmd_paddr, pte_paddr; + unsigned long pgd_pte, pmd_pte, pte; + + pgdir = SYMBOL(swapper_pg_dir); + if (pgdir == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get symbol of swapper_pg_dir\n"); + return NOT_PADDR; + } + + pgd_paddr = pgd_offset_l3(pgdir, vaddr); + if (!readmem(VADDR, pgd_paddr, &pgd_pte, sizeof pgd_pte)) { + ERRMSG("Can't get pgd_pte, pgd_paddr = 0x%lx\n", pgd_paddr); + return NOT_PADDR; + } + if (pgd_none(pgd_pte)) { + ERRMSG("Can't get a valid pgd_pte.\n"); + return NOT_PADDR; + } + + pmd_paddr = pmd_offset(pgd_pte, vaddr); + if (!readmem(PADDR, pmd_paddr, &pmd_pte, sizeof pmd_pte)) { + ERRMSG("Can't get pmd_pte, pmd_paddr = 0x%lx\n", pmd_paddr); + return NOT_PADDR; + } + if (pmd_none(pmd_pte)) { + ERRMSG("Can't get a valid pmd_pte.\n"); + return NOT_PADDR; + } + + if (pmd_large(pmd_pte)) + return pte_to_pa(pmd_pte) + (vaddr & ~PMD_MASK); + + pte_paddr = pte_offset(pmd_pte, vaddr); + if (!readmem(PADDR, pte_paddr, &pte, sizeof pte)) { + ERRMSG("Can't get pte, pte_paddr = 0x%lx\n", pmd_paddr); + return NOT_PADDR; + } + if (!pte_present(pte)) { + ERRMSG("Can't get a valid pte.\n"); + return NOT_PADDR; + } + + return pte_to_pa(pte) + (vaddr & ~PAGE_MASK); +} + +unsigned long vtop4_sparc64(unsigned long vaddr) +{ + unsigned long pgdir, pgd_paddr, pud_paddr, pmd_paddr, pte_paddr; + unsigned long pgd_pte, pud_pte, pmd_pte, pte; + + pgdir = SYMBOL(swapper_pg_dir); + if (pgdir == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get symbol of swapper_pg_dir\n"); + return NOT_PADDR; + } + + pgd_paddr = pgd_offset_l4(pgdir, vaddr); + if (!readmem(VADDR, pgd_paddr, &pgd_pte, sizeof pgd_pte)) { + ERRMSG("Can't get pgd_pte, pgd_paddr = 0x%lx\n", pgd_paddr); + return NOT_PADDR; + } + if (pgd_none(pgd_pte)) { + ERRMSG("Can't get a valid pgd_pte.\n"); + return NOT_PADDR; + } + + pud_paddr = pud_offset(pgd_pte, vaddr); + if (!readmem(PADDR, pud_paddr, &pud_pte, sizeof pud_pte)) { + ERRMSG("Can't get pud_pte, pud_paddr = 0x%lx\n", pud_paddr); + return NOT_PADDR; + } + if (pud_none(pud_pte)) { + ERRMSG("Can't get a valid pud_pte.\n"); + return NOT_PADDR; + } + + if (pud_large(pud_pte)) + return pte_to_pa(pud_pte) + (vaddr & ~PUD_MASK); + + pmd_paddr = pmd_offset(pud_pte, vaddr); + if (!readmem(PADDR, pmd_paddr, &pmd_pte, sizeof pmd_pte)) { + ERRMSG("Can't get pmd_pte, pmd_paddr = 0x%lx\n", pmd_paddr); + return NOT_PADDR; + } + if (pmd_none(pmd_pte)) { + ERRMSG("Can't get a valid pmd_pte.\n"); + return NOT_PADDR; + } + + if (pmd_large(pmd_pte)) + return pte_to_pa(pmd_pte) + (vaddr & ~PMD_MASK); + + pte_paddr = pte_offset(pmd_pte, vaddr); + if (!readmem(PADDR, pte_paddr, &pte, sizeof pte)) { + ERRMSG("Can't get pte, pte_paddr = 0x%lx\n", pmd_paddr); + return NOT_PADDR; + } + if (!pte_present(pte)) { + ERRMSG("Can't get a valid pte.\n"); + return NOT_PADDR; + } + + return pte_to_pa(pte) + (vaddr & ~PAGE_MASK); +} + +unsigned long long vaddr_to_paddr_sparc64(unsigned long vaddr) +{ + unsigned long paddr; + + paddr = vaddr_to_paddr_general(vaddr); + if (paddr != NOT_PADDR) + return paddr; + + if (is_vmemmap_addr_sparc64(vaddr)) + paddr = vmemmap_to_phys_sparc64(vaddr); + else if (is_vmalloc_addr_sparc64(vaddr)) { + if (info->kernel_version >= KERNEL_VERSION(3, 8, 13)) + paddr = vtop4_sparc64(vaddr); + else + paddr = vtop3_sparc64(vaddr); + } + if (paddr == NOT_PADDR) + ERRMSG("vaddr not mapped: 0x%lx\n", vaddr); + + return paddr; +} + +#endif /* sparc64 */ diff --git a/arch/x86.c b/arch/x86.c new file mode 100644 index 0000000..3fdae93 --- /dev/null +++ b/arch/x86.c @@ -0,0 +1,383 @@ +/* + * x86.c + * + * Copyright (C) 2006, 2007, 2008 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifdef __x86__ + +#include "../print_info.h" +#include "../elf_info.h" +#include "../makedumpfile.h" + +static int max_numnodes; +static unsigned long *remap_start_vaddr; +static unsigned long *remap_end_vaddr; +static unsigned long *remap_start_pfn; + +static int +remap_init(void) +{ + int n; + + if (SYMBOL(node_remap_start_vaddr) == NOT_FOUND_SYMBOL) + return TRUE; + if (SYMBOL(node_remap_end_vaddr) == NOT_FOUND_SYMBOL) + return TRUE; + if (SYMBOL(node_remap_start_pfn) == NOT_FOUND_SYMBOL) + return TRUE; + if (ARRAY_LENGTH(node_remap_start_pfn) == NOT_FOUND_STRUCTURE) + return TRUE; + + n = ARRAY_LENGTH(node_remap_start_pfn); + remap_start_vaddr = calloc(3 * n, sizeof(unsigned long)); + if (!remap_start_vaddr) { + ERRMSG("Can't allocate remap allocator info.\n"); + return FALSE; + } + remap_end_vaddr = remap_start_vaddr + n; + remap_start_pfn = remap_end_vaddr + n; + + if (!readmem(VADDR, SYMBOL(node_remap_start_vaddr), remap_start_vaddr, + n * sizeof(unsigned long))) { + ERRMSG("Can't get node_remap_start_vaddr.\n"); + return FALSE; + } + if (!readmem(VADDR, SYMBOL(node_remap_end_vaddr), remap_end_vaddr, + n * sizeof(unsigned long))) { + ERRMSG("Can't get node_remap_end_vaddr.\n"); + return FALSE; + } + if (!readmem(VADDR, SYMBOL(node_remap_start_pfn), remap_start_pfn, + n * sizeof(unsigned long))) { + ERRMSG("Can't get node_remap_start_pfn.\n"); + return FALSE; + } + + max_numnodes = n; + return TRUE; +} + +int +get_machdep_info_x86(void) +{ + unsigned long vmlist, vmap_area_list, vmalloc_start; + + /* PAE */ + if ((vt.mem_flags & MEMORY_X86_PAE) + || ((SYMBOL(pkmap_count) != NOT_FOUND_SYMBOL) + && (SYMBOL(pkmap_count_next) != NOT_FOUND_SYMBOL) + && ((SYMBOL(pkmap_count_next)-SYMBOL(pkmap_count))/sizeof(int)) + == 512)) { + DEBUG_MSG("\n"); + DEBUG_MSG("PAE : ON\n"); + vt.mem_flags |= MEMORY_X86_PAE; + info->max_physmem_bits = _MAX_PHYSMEM_BITS_PAE; + } else { + DEBUG_MSG("\n"); + DEBUG_MSG("PAE : OFF\n"); + info->max_physmem_bits = _MAX_PHYSMEM_BITS; + } + info->page_offset = __PAGE_OFFSET; + + if (SYMBOL(_stext) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of _stext.\n"); + return FALSE; + } + info->kernel_start = SYMBOL(_stext) & ~KVBASE_MASK; + DEBUG_MSG("kernel_start : %lx\n", info->kernel_start); + + if (!remap_init()) + return FALSE; + + /* + * Get vmalloc_start value from either vmap_area_list or vmlist. + */ + if ((SYMBOL(vmap_area_list) != NOT_FOUND_SYMBOL) + && (OFFSET(vmap_area.va_start) != NOT_FOUND_STRUCTURE) + && (OFFSET(vmap_area.list) != NOT_FOUND_STRUCTURE)) { + if (!readmem(VADDR, SYMBOL(vmap_area_list) + OFFSET(list_head.next), + &vmap_area_list, sizeof(vmap_area_list))) { + ERRMSG("Can't get vmap_area_list.\n"); + return FALSE; + } + if (!readmem(VADDR, vmap_area_list - OFFSET(vmap_area.list) + + OFFSET(vmap_area.va_start), &vmalloc_start, + sizeof(vmalloc_start))) { + ERRMSG("Can't get vmalloc_start.\n"); + return FALSE; + } + } else if ((SYMBOL(vmlist) != NOT_FOUND_SYMBOL) + && (OFFSET(vm_struct.addr) != NOT_FOUND_STRUCTURE)) { + if (!readmem(VADDR, SYMBOL(vmlist), &vmlist, sizeof(vmlist))) { + ERRMSG("Can't get vmlist.\n"); + return FALSE; + } + if (!readmem(VADDR, vmlist + OFFSET(vm_struct.addr), &vmalloc_start, + sizeof(vmalloc_start))) { + ERRMSG("Can't get vmalloc_start.\n"); + return FALSE; + } + } else { + /* + * For the compatibility, makedumpfile should run without the symbol + * used to get vmalloc_start value if they are not necessary. + */ + return TRUE; + } + info->vmalloc_start = vmalloc_start; + DEBUG_MSG("vmalloc_start: %lx\n", vmalloc_start); + + return TRUE; +} + +int +get_versiondep_info_x86(void) +{ + /* + * SECTION_SIZE_BITS of PAE has been changed to 29 from 30 since + * linux-2.6.26. + */ + if (vt.mem_flags & MEMORY_X86_PAE) { + if (info->kernel_version < KERNEL_VERSION(2, 6, 26)) + info->section_size_bits = _SECTION_SIZE_BITS_PAE_ORIG; + else + info->section_size_bits = _SECTION_SIZE_BITS_PAE_2_6_26; + } else + info->section_size_bits = _SECTION_SIZE_BITS; + + return TRUE; +} + +unsigned long long +vtop_x86_remap(unsigned long vaddr) +{ + int i; + for (i = 0; i < max_numnodes; ++i) + if (vaddr >= remap_start_vaddr[i] && + vaddr < remap_end_vaddr[i]) + return pfn_to_paddr(remap_start_pfn[i]) + + vaddr - remap_start_vaddr[i]; + return NOT_PADDR; +} + +unsigned long long +vtop_x86_PAE(unsigned long vaddr) +{ + unsigned long long page_dir, pgd_pte, pmd_paddr, pmd_pte; + unsigned long long pte_paddr, pte; + + if (SYMBOL(swapper_pg_dir) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of swapper_pg_dir.\n"); + return NOT_PADDR; + } + + page_dir = SYMBOL(swapper_pg_dir); + page_dir += pgd_index_PAE(vaddr) * sizeof(unsigned long long); + if (!readmem(VADDR, page_dir, &pgd_pte, sizeof(pgd_pte))) { + ERRMSG("Can't get pgd_pte (page_dir:%llx).\n", page_dir); + return NOT_PADDR; + } + if (!(pgd_pte & _PAGE_PRESENT)) + return NOT_PADDR; + + if (info->vaddr_for_vtop == vaddr) + MSG(" PGD : %16llx => %16llx\n", page_dir, pgd_pte); + + pmd_paddr = pgd_pte & ENTRY_MASK; + pmd_paddr += pmd_index(vaddr) * sizeof(unsigned long long); + if (!readmem(PADDR, pmd_paddr, &pmd_pte, sizeof(pmd_pte))) { + ERRMSG("Can't get pmd_pte (pmd_paddr:%llx).\n", pmd_paddr); + return NOT_PADDR; + } + if (!(pmd_pte & _PAGE_PRESENT)) + return NOT_PADDR; + + if (info->vaddr_for_vtop == vaddr) + MSG(" PMD : %16llx => %16llx\n", pmd_paddr, pmd_pte); + + if (pmd_pte & _PAGE_PSE) + return (pmd_pte & ENTRY_MASK) + (vaddr & ((1UL << PMD_SHIFT) - 1)); + + pte_paddr = pmd_pte & ENTRY_MASK; + pte_paddr += pte_index(vaddr) * sizeof(unsigned long long); + if (!readmem(PADDR, pte_paddr, &pte, sizeof(pte))) + return NOT_PADDR; + + if (!(pte & _PAGE_PRESENT)) + return NOT_PADDR; + + if (info->vaddr_for_vtop == vaddr) + MSG(" PTE : %16llx => %16llx\n", pte_paddr, pte); + + return (pte & ENTRY_MASK) + (vaddr & ((1UL << PTE_SHIFT) - 1)); +} + +int +is_vmalloc_addr_x86(unsigned long vaddr) +{ + return (info->vmalloc_start && vaddr >= info->vmalloc_start); +} + +unsigned long long +vaddr_to_paddr_x86(unsigned long vaddr) +{ + unsigned long long paddr; + + if ((paddr = vtop_x86_remap(vaddr)) != NOT_PADDR) { + if (is_xen_memory()) + paddr = ptom_xen(paddr); + return paddr; + } + + if ((paddr = vaddr_to_paddr_general(vaddr)) != NOT_PADDR) + return paddr; + + if (((SYMBOL(vmap_area_list) == NOT_FOUND_SYMBOL) + || (OFFSET(vmap_area.va_start) == NOT_FOUND_STRUCTURE) + || (OFFSET(vmap_area.list) == NOT_FOUND_STRUCTURE)) + && ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL) + || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE))) { + ERRMSG("Can't get necessary information for vmalloc translation.\n"); + return NOT_PADDR; + } + if (!is_vmalloc_addr_x86(vaddr)) { + paddr = vaddr - info->kernel_start; + if (is_xen_memory()) + paddr = ptom_xen(paddr); + return paddr; + } + + if (vt.mem_flags & MEMORY_X86_PAE) { + paddr = vtop_x86_PAE(vaddr); + } else { + /* + * TODO: Support vmalloc translation of not-PAE kernel. + */ + ERRMSG("This makedumpfile does not support vmalloc translation of not-PAE kernel.\n"); + return NOT_PADDR; + } + + return paddr; +} + +/* + * for Xen extraction + */ +unsigned long long +kvtop_xen_x86(unsigned long kvaddr) +{ + unsigned long long dirp, entry; + + if (!is_xen_vaddr(kvaddr)) + return NOT_PADDR; + + if (is_direct(kvaddr)) + return (unsigned long)kvaddr - DIRECTMAP_VIRT_START; + + if ((dirp = kvtop_xen_x86(SYMBOL(pgd_l3))) == NOT_PADDR) + return NOT_PADDR; + dirp += pgd_index_PAE(kvaddr) * sizeof(unsigned long long); + if (!readmem(PADDR, dirp, &entry, sizeof(entry))) + return NOT_PADDR; + + if (!(entry & _PAGE_PRESENT)) + return NOT_PADDR; + + dirp = entry & ENTRY_MASK; + dirp += pmd_index(kvaddr) * sizeof(unsigned long long); + if (!readmem(PADDR, dirp, &entry, sizeof(entry))) + return NOT_PADDR; + + if (!(entry & _PAGE_PRESENT)) + return NOT_PADDR; + + if (entry & _PAGE_PSE) { + entry = (entry & ENTRY_MASK) + (kvaddr & ((1UL << PMD_SHIFT) - 1)); + return entry; + } + + dirp = entry & ENTRY_MASK; + dirp += pte_index(kvaddr) * sizeof(unsigned long long); + if (!readmem(PADDR, dirp, &entry, sizeof(entry))) + return NOT_PADDR; + + if (!(entry & _PAGE_PRESENT)) { + return NOT_PADDR; + } + + entry = (entry & ENTRY_MASK) + (kvaddr & ((1UL << PTE_SHIFT) - 1)); + + return entry; +} + +int get_xen_basic_info_x86(void) +{ + if (SYMBOL(pgd_l2) == NOT_FOUND_SYMBOL && + SYMBOL(pgd_l3) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get pgd.\n"); + return FALSE; + } + + if (SYMBOL(pgd_l3) == NOT_FOUND_SYMBOL) { + ERRMSG("non-PAE not support right now.\n"); + return FALSE; + } + + if (SYMBOL(frame_table) != NOT_FOUND_SYMBOL) { + unsigned long frame_table_vaddr; + + if (!readmem(VADDR_XEN, SYMBOL(frame_table), + &frame_table_vaddr, sizeof(frame_table_vaddr))) { + ERRMSG("Can't get the value of frame_table.\n"); + return FALSE; + } + info->frame_table_vaddr = frame_table_vaddr; + } else + info->frame_table_vaddr = FRAMETABLE_VIRT_START; + + if (!info->xen_crash_info.com || + info->xen_crash_info.com->xen_major_version < 4) { + unsigned long xen_end; + + if (SYMBOL(xenheap_phys_end) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of xenheap_phys_end.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, SYMBOL(xenheap_phys_end), &xen_end, + sizeof(xen_end))) { + ERRMSG("Can't get the value of xenheap_phys_end.\n"); + return FALSE; + } + info->xen_heap_start = 0; + info->xen_heap_end = paddr_to_pfn(xen_end); + } + + return TRUE; +} + +int get_xen_info_x86(void) +{ + int i; + + /* + * pickled_id == domain addr for x86 + */ + for (i = 0; i < info->num_domain; i++) { + info->domain_list[i].pickled_id = + info->domain_list[i].domain_addr; + } + + return TRUE; +} +#endif /* x86 */ + diff --git a/arch/x86_64.c b/arch/x86_64.c new file mode 100644 index 0000000..2b3c0bb --- /dev/null +++ b/arch/x86_64.c @@ -0,0 +1,907 @@ +/* + * x86_64.c + * + * Copyright (C) 2006, 2007, 2008 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifdef __x86_64__ + +#include "../print_info.h" +#include "../elf_info.h" +#include "../makedumpfile.h" +extern struct vmap_pfns *gvmem_pfns; +extern int nr_gvmem_pfns; + +static unsigned long +get_xen_p2m_mfn(void) +{ + if (info->xen_crash_info_v >= 2) + return info->xen_crash_info.v2-> + dom0_pfn_to_mfn_frame_list_list; + if (info->xen_crash_info_v >= 1) + return info->xen_crash_info.v1-> + dom0_pfn_to_mfn_frame_list_list; + return NOT_FOUND_LONG_VALUE; +} + +static int +check_5level_paging(void) +{ + if (NUMBER(pgtable_l5_enabled) != NOT_FOUND_NUMBER && + NUMBER(pgtable_l5_enabled) != 0) + return TRUE; + else + return FALSE; +} + +unsigned long +get_kaslr_offset_x86_64(unsigned long vaddr) +{ + unsigned int i; + char buf[BUFSIZE_FGETS], *endp; + + if (!info->kaslr_offset && info->file_vmcoreinfo) { + if (fseek(info->file_vmcoreinfo, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return FALSE; + } + + while (fgets(buf, BUFSIZE_FGETS, info->file_vmcoreinfo)) { + i = strlen(buf); + if (!i) + break; + if (buf[i - 1] == '\n') + buf[i - 1] = '\0'; + if (strncmp(buf, STR_KERNELOFFSET, + strlen(STR_KERNELOFFSET)) == 0) + info->kaslr_offset = + strtoul(buf+strlen(STR_KERNELOFFSET),&endp,16); + } + } + if (vaddr >= __START_KERNEL_map && + vaddr < __START_KERNEL_map + info->kaslr_offset) + return info->kaslr_offset; + else + /* + * TODO: we need to check if it is vmalloc/vmmemmap/module + * address, we will have different offset + */ + return 0; +} + +static int +get_page_offset_x86_64(void) +{ + int i; + unsigned long long phys_start; + unsigned long long virt_start; + unsigned long page_offset_base; + + if (info->kaslr_offset && (info->fd_vmlinux != -1)) { + page_offset_base = get_symbol_addr("page_offset_base"); + page_offset_base += info->kaslr_offset; + if (!readmem(VADDR, page_offset_base, &info->page_offset, + sizeof(info->page_offset))) { + ERRMSG("Can't read page_offset_base.\n"); + return FALSE; + } + return TRUE; + } + + if (get_num_pt_loads()) { + for (i = 0; + get_pt_load(i, &phys_start, NULL, &virt_start, NULL); + i++) { + if (virt_start != NOT_KV_ADDR + && virt_start < __START_KERNEL_map + && phys_start != NOT_PADDR) { + info->page_offset = virt_start - phys_start; + return TRUE; + } + } + } + + if (info->kernel_version < KERNEL_VERSION(2, 6, 27)) { + info->page_offset = __PAGE_OFFSET_ORIG; + } else if(check_5level_paging()) { + info->page_offset = __PAGE_OFFSET_5LEVEL; + } else { + info->page_offset = __PAGE_OFFSET_2_6_27; + } + + return TRUE; +} + +int +get_phys_base_x86_64(void) +{ + int i; + unsigned long long phys_start; + unsigned long long virt_start; + + /* + * Get the relocatable offset + */ + info->phys_base = 0; /* default/traditional */ + if (NUMBER(phys_base) != NOT_FOUND_NUMBER) { + info->phys_base = NUMBER(phys_base); + return TRUE; + } + + /* linux-2.6.21 or older don't have phys_base, should be set to 0. */ + if (!has_vmcoreinfo()) { + SYMBOL_INIT(phys_base, "phys_base"); + if (SYMBOL(phys_base) == NOT_FOUND_SYMBOL) { + return TRUE; + } + } + + for (i = 0; get_pt_load(i, &phys_start, NULL, &virt_start, NULL); i++) { + if (virt_start >= __START_KERNEL_map + && phys_start != NOT_PADDR) { + + info->phys_base = phys_start - + (virt_start & ~(__START_KERNEL_map)); + + break; + } + } + + return TRUE; +} + +int +get_machdep_info_x86_64(void) +{ + unsigned long p2m_mfn; + int i, j, mfns[MAX_X86_64_FRAMES]; + unsigned long frame_mfn[MAX_X86_64_FRAMES]; + unsigned long buf[MFNS_PER_FRAME]; + + info->section_size_bits = _SECTION_SIZE_BITS; + + if (!is_xen_memory()) + return TRUE; + + /* + * Get the information for translating domain-0's physical + * address into machine address. + */ + p2m_mfn = get_xen_p2m_mfn(); + if (p2m_mfn == (unsigned long)NOT_FOUND_LONG_VALUE) { + ERRMSG("Can't get p2m_mfn address.\n"); + return FALSE; + } + if (!readmem(PADDR, pfn_to_paddr(p2m_mfn), + &frame_mfn, PAGESIZE())) { + ERRMSG("Can't read p2m_mfn.\n"); + return FALSE; + } + + /* + * Count the number of p2m frame. + */ + for (i = 0; i < MAX_X86_64_FRAMES; i++) { + mfns[i] = 0; + if (!frame_mfn[i]) + break; + + if (!readmem(PADDR, pfn_to_paddr(frame_mfn[i]), &buf, + PAGESIZE())) { + ERRMSG("Can't get frame_mfn[%d].\n", i); + return FALSE; + } + for (j = 0; j < MFNS_PER_FRAME; j++) { + if (!buf[j]) + break; + + mfns[i]++; + } + info->p2m_frames += mfns[i]; + } + info->p2m_mfn_frame_list + = malloc(sizeof(unsigned long) * info->p2m_frames); + if (info->p2m_mfn_frame_list == NULL) { + ERRMSG("Can't allocate memory for p2m_mfn_frame_list. %s\n", + strerror(errno)); + return FALSE; + } + + /* + * Get p2m_mfn_frame_list. + */ + for (i = 0; i < MAX_X86_64_FRAMES; i++) { + if (!frame_mfn[i]) + break; + + if (!readmem(PADDR, pfn_to_paddr(frame_mfn[i]), + &info->p2m_mfn_frame_list[i * MFNS_PER_FRAME], + mfns[i] * sizeof(unsigned long))) { + ERRMSG("Can't get p2m_mfn_frame_list.\n"); + return FALSE; + } + if (mfns[i] != MFNS_PER_FRAME) + break; + } + return TRUE; +} + +int +get_versiondep_info_x86_64(void) +{ + /* + * On linux-2.6.26, MAX_PHYSMEM_BITS is changed to 44 from 40. + */ + if (info->kernel_version < KERNEL_VERSION(2, 6, 26)) + info->max_physmem_bits = _MAX_PHYSMEM_BITS_ORIG; + else if (info->kernel_version < KERNEL_VERSION(2, 6, 31)) + info->max_physmem_bits = _MAX_PHYSMEM_BITS_2_6_26; + else if(check_5level_paging()) + info->max_physmem_bits = _MAX_PHYSMEM_BITS_5LEVEL; + else + info->max_physmem_bits = _MAX_PHYSMEM_BITS_2_6_31; + + if (!get_page_offset_x86_64()) + return FALSE; + + if (info->kernel_version < KERNEL_VERSION(2, 6, 31)) { + info->vmemmap_start = VMEMMAP_START_ORIG; + info->vmemmap_end = VMEMMAP_END_ORIG; + } else if(check_5level_paging()) { + info->vmemmap_start = VMEMMAP_START_5LEVEL; + info->vmemmap_end = VMEMMAP_END_5LEVEL; + } else { + info->vmemmap_start = VMEMMAP_START_2_6_31; + info->vmemmap_end = VMEMMAP_END_2_6_31; + } + + return TRUE; +} + +/* + * Translate a virtual address to a physical address by using 4 levels paging. + */ +unsigned long long +__vtop4_x86_64(unsigned long vaddr, unsigned long pagetable) +{ + unsigned long page_dir, pgd, pud_paddr, pud_pte, pmd_paddr, pmd_pte; + unsigned long pte_paddr, pte; + unsigned long p4d_paddr, p4d_pte; + + /* + * Get PGD. + */ + page_dir = pagetable; + if (is_xen_memory()) { + page_dir = ptom_xen(page_dir); + if (page_dir == NOT_PADDR) + return NOT_PADDR; + } + + if (check_5level_paging()) { + page_dir += pgd5_index(vaddr) * sizeof(unsigned long); + if (!readmem(PADDR, page_dir, &pgd, sizeof pgd)) { + ERRMSG("Can't get pgd (page_dir:%lx).\n", page_dir); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PGD : %16lx => %16lx\n", page_dir, pgd); + + if (!(pgd & _PAGE_PRESENT)) { + ERRMSG("Can't get a valid pgd.\n"); + return NOT_PADDR; + } + /* + * Get P4D. + */ + p4d_paddr = pgd & ENTRY_MASK; + p4d_paddr += p4d_index(vaddr) * sizeof(unsigned long); + if (!readmem(PADDR, p4d_paddr, &p4d_pte, sizeof p4d_pte)) { + ERRMSG("Can't get p4d_pte (p4d_paddr:%lx).\n", p4d_paddr); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" P4D : %16lx => %16lx\n", p4d_paddr, p4d_pte); + + if (!(p4d_pte & _PAGE_PRESENT)) { + ERRMSG("Can't get a valid p4d_pte.\n"); + return NOT_PADDR; + } + pud_paddr = p4d_pte & ENTRY_MASK; + }else { + page_dir += pgd_index(vaddr) * sizeof(unsigned long); + if (!readmem(PADDR, page_dir, &pgd, sizeof pgd)) { + ERRMSG("Can't get pgd (page_dir:%lx).\n", page_dir); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PGD : %16lx => %16lx\n", page_dir, pgd); + + if (!(pgd & _PAGE_PRESENT)) { + ERRMSG("Can't get a valid pgd.\n"); + return NOT_PADDR; + } + pud_paddr = pgd & ENTRY_MASK; + } + + /* + * Get PUD. + */ + pud_paddr += pud_index(vaddr) * sizeof(unsigned long); + if (!readmem(PADDR, pud_paddr, &pud_pte, sizeof pud_pte)) { + ERRMSG("Can't get pud_pte (pud_paddr:%lx).\n", pud_paddr); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PUD : %16lx => %16lx\n", pud_paddr, pud_pte); + + if (!(pud_pte & _PAGE_PRESENT)) { + ERRMSG("Can't get a valid pud_pte.\n"); + return NOT_PADDR; + } + if (pud_pte & _PAGE_PSE) /* 1GB pages */ + return (pud_pte & ENTRY_MASK & PUD_MASK) + + (vaddr & ~PUD_MASK); + + /* + * Get PMD. + */ + pmd_paddr = pud_pte & ENTRY_MASK; + pmd_paddr += pmd_index(vaddr) * sizeof(unsigned long); + if (!readmem(PADDR, pmd_paddr, &pmd_pte, sizeof pmd_pte)) { + ERRMSG("Can't get pmd_pte (pmd_paddr:%lx).\n", pmd_paddr); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PMD : %16lx => %16lx\n", pmd_paddr, pmd_pte); + + if (!(pmd_pte & _PAGE_PRESENT)) { + ERRMSG("Can't get a valid pmd_pte.\n"); + return NOT_PADDR; + } + if (pmd_pte & _PAGE_PSE) /* 2MB pages */ + return (pmd_pte & ENTRY_MASK & PMD_MASK) + + (vaddr & ~PMD_MASK); + + /* + * Get PTE. + */ + pte_paddr = pmd_pte & ENTRY_MASK; + pte_paddr += pte_index(vaddr) * sizeof(unsigned long); + if (!readmem(PADDR, pte_paddr, &pte, sizeof pte)) { + ERRMSG("Can't get pte (pte_paddr:%lx).\n", pte_paddr); + return NOT_PADDR; + } + if (info->vaddr_for_vtop == vaddr) + MSG(" PTE : %16lx => %16lx\n", pte_paddr, pte); + + if (!(pte & _PAGE_PRESENT)) { + ERRMSG("Can't get a valid pte.\n"); + return NOT_PADDR; + } + return (pte & ENTRY_MASK) + PAGEOFFSET(vaddr); +} + +unsigned long long +vtop4_x86_64(unsigned long vaddr) +{ + unsigned long pagetable; + unsigned long init_level4_pgt; + + if (SYMBOL(init_level4_pgt) != NOT_FOUND_SYMBOL) + init_level4_pgt = SYMBOL(init_level4_pgt); + else if (SYMBOL(init_top_pgt) != NOT_FOUND_SYMBOL) + init_level4_pgt = SYMBOL(init_top_pgt); + else { + ERRMSG("Can't get the symbol of init_level4_pgt/init_top_pgt.\n"); + return NOT_PADDR; + } + + pagetable = init_level4_pgt - __START_KERNEL_map + info->phys_base; + + return __vtop4_x86_64(vaddr, pagetable); +} + +unsigned long long +vtop4_x86_64_pagetable(unsigned long vaddr, unsigned long pagetable) +{ + return __vtop4_x86_64(vaddr, pagetable); +} + +/* + * for Xen extraction + */ +unsigned long long +kvtop_xen_x86_64(unsigned long kvaddr) +{ + unsigned long long dirp, entry; + + if (!is_xen_vaddr(kvaddr)) + return NOT_PADDR; + + if (is_xen_text(kvaddr)) + return (unsigned long)kvaddr - XEN_VIRT_START + info->xen_phys_start; + + if (is_direct(kvaddr)) + return (unsigned long)kvaddr - DIRECTMAP_VIRT_START; + + if ((dirp = kvtop_xen_x86_64(SYMBOL(pgd_l4))) == NOT_PADDR) + return NOT_PADDR; + + /* + * Get PGD. + */ + dirp += pgd_index(kvaddr) * sizeof(unsigned long long); + if (!readmem(PADDR, dirp, &entry, sizeof(entry))) + return NOT_PADDR; + + if (!(entry & _PAGE_PRESENT)) + return NOT_PADDR; + + /* + * Get PUD. + */ + dirp = entry & ENTRY_MASK; + dirp += pud_index(kvaddr) * sizeof(unsigned long long); + if (!readmem(PADDR, dirp, &entry, sizeof(entry))) + return NOT_PADDR; + + if (!(entry & _PAGE_PRESENT)) + return NOT_PADDR; + + if (entry & _PAGE_PSE) /* 1GB pages */ + return (entry & ENTRY_MASK & PUD_MASK) + + (kvaddr & ~PUD_MASK); + + /* + * Get PMD. + */ + dirp = entry & ENTRY_MASK; + dirp += pmd_index(kvaddr) * sizeof(unsigned long long); + if (!readmem(PADDR, dirp, &entry, sizeof(entry))) + return NOT_PADDR; + + if (!(entry & _PAGE_PRESENT)) + return NOT_PADDR; + + if (entry & _PAGE_PSE) /* 2MB pages */ + return (entry & ENTRY_MASK & PMD_MASK) + + (kvaddr & ~PMD_MASK); + + /* + * Get PTE. + */ + dirp = entry & ENTRY_MASK; + dirp += pte_index(kvaddr) * sizeof(unsigned long long); + if (!readmem(PADDR, dirp, &entry, sizeof(entry))) + return NOT_PADDR; + + if (!(entry & _PAGE_PRESENT)) { + return NOT_PADDR; + } + + return (entry & ENTRY_MASK) + PAGEOFFSET(kvaddr); +} + +int get_xen_basic_info_x86_64(void) +{ + if (!info->xen_phys_start) { + if (info->xen_crash_info_v < 2) { + ERRMSG("Can't get Xen physical start address.\n" + "Please use the --xen_phys_start option."); + return FALSE; + } + info->xen_phys_start = info->xen_crash_info.v2->xen_phys_start; + } + + info->xen_virt_start = SYMBOL(domain_list); + + /* + * Xen virtual mapping is aligned to 1 GiB boundary. + * domain_list lives in bss which sits no more than + * 1 GiB below beginning of virtual address space. + */ + info->xen_virt_start &= 0xffffffffc0000000; + + if (info->xen_crash_info.com && + info->xen_crash_info.com->xen_major_version >= 4) + info->directmap_virt_end = DIRECTMAP_VIRT_END_V4; + else + info->directmap_virt_end = DIRECTMAP_VIRT_END_V3; + + if (SYMBOL(pgd_l4) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get pml4.\n"); + return FALSE; + } + + if (SYMBOL(frame_table) != NOT_FOUND_SYMBOL) { + unsigned long frame_table_vaddr; + + if (!readmem(VADDR_XEN, SYMBOL(frame_table), + &frame_table_vaddr, sizeof(frame_table_vaddr))) { + ERRMSG("Can't get the value of frame_table.\n"); + return FALSE; + } + info->frame_table_vaddr = frame_table_vaddr; + } else { + if (info->xen_crash_info.com && + ((info->xen_crash_info.com->xen_major_version == 4 && + info->xen_crash_info.com->xen_minor_version >= 3) || + info->xen_crash_info.com->xen_major_version > 4)) + info->frame_table_vaddr = FRAMETABLE_VIRT_START_V4_3; + else + info->frame_table_vaddr = FRAMETABLE_VIRT_START_V3; + } + + if (!info->xen_crash_info.com || + info->xen_crash_info.com->xen_major_version < 4) { + unsigned long xen_end; + + if (SYMBOL(xenheap_phys_end) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of xenheap_phys_end.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, SYMBOL(xenheap_phys_end), &xen_end, + sizeof(xen_end))) { + ERRMSG("Can't get the value of xenheap_phys_end.\n"); + return FALSE; + } + info->xen_heap_start = 0; + info->xen_heap_end = paddr_to_pfn(xen_end); + } + + return TRUE; +} + +int get_xen_info_x86_64(void) +{ + int i; + + if (info->xen_crash_info.com && + (info->xen_crash_info.com->xen_major_version >= 4 || + (info->xen_crash_info.com->xen_major_version == 3 && + info->xen_crash_info.com->xen_minor_version >= 4))) { + /* + * cf. changeset 0858f961c77a + */ + for (i = 0; i < info->num_domain; i++) { + info->domain_list[i].pickled_id = + (info->domain_list[i].domain_addr - + DIRECTMAP_VIRT_START) >> PAGESHIFT(); + } + } else { + /* + * pickled_id == domain addr for x86_64 + */ + for (i = 0; i < info->num_domain; i++) { + info->domain_list[i].pickled_id = + info->domain_list[i].domain_addr; + } + } + + return TRUE; +} + +/* + * Scan the kernel page table for the pfn's of the page structs + * Place them in array gvmem_pfns[nr_gvmem_pfns] + */ +int +find_vmemmap_x86_64() +{ + int i; + int pgd_index; + int start_range = 1; + int num_pmds=0, num_pmds_valid=0; + int break_in_valids, break_after_invalids; + int do_break; + int last_valid=0, last_invalid=0; + int pagestructsize, structsperhpage, hugepagesize; + long page_structs_per_pud; + long num_puds, groups = 0; + long pgdindex, pudindex, pmdindex; + long vaddr_base; + long rep_pfn_start = 0, rep_pfn_end = 0; + unsigned long init_level4_pgt; + unsigned long max_paddr, high_pfn; + unsigned long pgd_addr, pud_addr, pmd_addr; + unsigned long *pgdp, *pudp, *pmdp; + unsigned long pud_page[PTRS_PER_PUD]; + unsigned long pmd_page[PTRS_PER_PMD]; + unsigned long vmap_offset_start = 0, vmap_offset_end = 0; + unsigned long pmd, tpfn; + unsigned long pvaddr = 0; + unsigned long data_addr = 0, last_data_addr = 0, start_data_addr = 0; + /* + * data_addr is the paddr of the page holding the page structs. + * We keep lists of contiguous pages and the pfn's that their + * page structs represent. + * start_data_addr and last_data_addr mark start/end of those + * contiguous areas. + * An area descriptor is vmap start/end pfn and rep start/end + * of the pfn's represented by the vmap start/end. + */ + struct vmap_pfns *vmapp, *vmaphead = NULL, *cur, *tail; + + init_level4_pgt = SYMBOL(init_level4_pgt); + if (init_level4_pgt == NOT_FOUND_SYMBOL) + init_level4_pgt = SYMBOL(init_top_pgt); + + if (init_level4_pgt == NOT_FOUND_SYMBOL) { + ERRMSG("init_level4_pgt/init_top_pgt not found\n"); + return FAILED; + } + + pagestructsize = size_table.page; + hugepagesize = PTRS_PER_PMD * info->page_size; + vaddr_base = info->vmemmap_start; + max_paddr = get_max_paddr(); + /* + * the page structures are mapped at VMEMMAP_START (info->vmemmap_start) + * for max_paddr >> 12 page structures + */ + high_pfn = max_paddr >> 12; + pgd_index = pgd_index(vaddr_base); + pgd_addr = vaddr_to_paddr(init_level4_pgt); /* address of pgd */ + pgd_addr += pgd_index * sizeof(unsigned long); + page_structs_per_pud = (PTRS_PER_PUD * PTRS_PER_PMD * info->page_size) / + pagestructsize; + num_puds = (high_pfn + page_structs_per_pud - 1) / page_structs_per_pud; + pvaddr = VMEMMAP_START; + structsperhpage = hugepagesize / pagestructsize; + + /* outer loop is for pud entries in the pgd */ + for (pgdindex = 0, pgdp = (unsigned long *)pgd_addr; pgdindex < num_puds; + pgdindex++, pgdp++) { + + /* read the pgd one word at a time, into pud_addr */ + if (!readmem(PADDR, (unsigned long long)pgdp, (void *)&pud_addr, + sizeof(unsigned long))) { + ERRMSG("Can't get pgd entry for slot %d.\n", pgd_index); + return FAILED; + } + + /* mask the pgd entry for the address of the pud page */ + pud_addr &= PMASK; + if (pud_addr == 0) + continue; + /* read the entire pud page */ + if (!readmem(PADDR, (unsigned long long)pud_addr, (void *)pud_page, + PTRS_PER_PUD * sizeof(unsigned long))) { + ERRMSG("Can't get pud entry for pgd slot %ld.\n", pgdindex); + return FAILED; + } + /* step thru each pmd address in the pud page */ + /* pudp points to an entry in the pud page */ + for (pudp = (unsigned long *)pud_page, pudindex = 0; + pudindex < PTRS_PER_PUD; pudindex++, pudp++) { + pmd_addr = *pudp & PMASK; + /* read the entire pmd page */ + if (pmd_addr == 0) + continue; + if (!readmem(PADDR, pmd_addr, (void *)pmd_page, + PTRS_PER_PMD * sizeof(unsigned long))) { + ERRMSG("Can't get pud entry for slot %ld.\n", pudindex); + return FAILED; + } + /* pmdp points to an entry in the pmd */ + for (pmdp = (unsigned long *)pmd_page, pmdindex = 0; + pmdindex < PTRS_PER_PMD; pmdindex++, pmdp++) { + /* linear page position in this page table: */ + pmd = *pmdp; + num_pmds++; + tpfn = (pvaddr - VMEMMAP_START) / + pagestructsize; + if (tpfn >= high_pfn) { + break; + } + /* + * vmap_offset_start: + * Starting logical position in the + * vmemmap array for the group stays + * constant until a hole in the table + * or a break in contiguousness. + */ + + /* + * Ending logical position in the + * vmemmap array: + */ + vmap_offset_end += hugepagesize; + do_break = 0; + break_in_valids = 0; + break_after_invalids = 0; + /* + * We want breaks either when: + * - we hit a hole (invalid) + * - we discontiguous page is a string of valids + */ + if (pmd) { + data_addr = (pmd & PMASK); + if (start_range) { + /* first-time kludge */ + start_data_addr = data_addr; + last_data_addr = start_data_addr + - hugepagesize; + start_range = 0; + } + if (last_invalid) { + /* end of a hole */ + start_data_addr = data_addr; + last_data_addr = start_data_addr + - hugepagesize; + /* trigger update of offset */ + do_break = 1; + } + last_valid = 1; + last_invalid = 0; + /* + * we have a gap in physical + * contiguousness in the table. + */ + /* ?? consecutive holes will have + same data_addr */ + if (data_addr != + last_data_addr + hugepagesize) { + do_break = 1; + break_in_valids = 1; + } + DEBUG_MSG("valid: pud %ld pmd %ld pfn %#lx" + " pvaddr %#lx pfns %#lx-%lx" + " start %#lx end %#lx\n", + pudindex, pmdindex, + data_addr >> 12, + pvaddr, tpfn, + tpfn + structsperhpage - 1, + vmap_offset_start, + vmap_offset_end); + num_pmds_valid++; + if (!(pmd & _PAGE_PSE)) { + printf("vmemmap pmd not huge, abort\n"); + return FAILED; + } + } else { + if (last_valid) { + /* this a hole after some valids */ + do_break = 1; + break_in_valids = 1; + break_after_invalids = 0; + } + last_valid = 0; + last_invalid = 1; + /* + * There are holes in this sparsely + * populated table; they are 2MB gaps + * represented by null pmd entries. + */ + DEBUG_MSG("invalid: pud %ld pmd %ld %#lx" + " pfns %#lx-%lx start %#lx end" + " %#lx\n", pudindex, pmdindex, + pvaddr, tpfn, + tpfn + structsperhpage - 1, + vmap_offset_start, + vmap_offset_end); + } + if (do_break) { + /* The end of a hole is not summarized. + * It must be the start of a hole or + * hitting a discontiguous series. + */ + if (break_in_valids || break_after_invalids) { + /* + * calculate that pfns + * represented by the current + * offset in the vmemmap. + */ + /* page struct even partly on this page */ + rep_pfn_start = vmap_offset_start / + pagestructsize; + /* ending page struct entirely on + this page */ + rep_pfn_end = ((vmap_offset_end - + hugepagesize) / pagestructsize); + DEBUG_MSG("vmap pfns %#lx-%lx " + "represent pfns %#lx-%lx\n\n", + start_data_addr >> PAGESHIFT(), + last_data_addr >> PAGESHIFT(), + rep_pfn_start, rep_pfn_end); + groups++; + vmapp = (struct vmap_pfns *)malloc( + sizeof(struct vmap_pfns)); + /* pfn of this 2MB page of page structs */ + vmapp->vmap_pfn_start = start_data_addr + >> PTE_SHIFT; + vmapp->vmap_pfn_end = last_data_addr + >> PTE_SHIFT; + /* these (start/end) are literal pfns + * on this page, not start and end+1 */ + vmapp->rep_pfn_start = rep_pfn_start; + vmapp->rep_pfn_end = rep_pfn_end; + + if (!vmaphead) { + vmaphead = vmapp; + vmapp->next = vmapp; + vmapp->prev = vmapp; + } else { + tail = vmaphead->prev; + vmaphead->prev = vmapp; + tail->next = vmapp; + vmapp->next = vmaphead; + vmapp->prev = tail; + } + } + + /* update logical position at every break */ + vmap_offset_start = + vmap_offset_end - hugepagesize; + start_data_addr = data_addr; + } + + last_data_addr = data_addr; + pvaddr += hugepagesize; + /* + * pvaddr is current virtual address + * eg 0xffffea0004200000 if + * vmap_offset_start is 4200000 + */ + } + } + tpfn = (pvaddr - VMEMMAP_START) / pagestructsize; + if (tpfn >= high_pfn) { + break; + } + } + rep_pfn_start = vmap_offset_start / pagestructsize; + rep_pfn_end = (vmap_offset_end - hugepagesize) / pagestructsize; + DEBUG_MSG("vmap pfns %#lx-%lx represent pfns %#lx-%lx\n\n", + start_data_addr >> PAGESHIFT(), last_data_addr >> PAGESHIFT(), + rep_pfn_start, rep_pfn_end); + groups++; + vmapp = (struct vmap_pfns *)malloc(sizeof(struct vmap_pfns)); + vmapp->vmap_pfn_start = start_data_addr >> PTE_SHIFT; + vmapp->vmap_pfn_end = last_data_addr >> PTE_SHIFT; + vmapp->rep_pfn_start = rep_pfn_start; + vmapp->rep_pfn_end = rep_pfn_end; + if (!vmaphead) { + vmaphead = vmapp; + vmapp->next = vmapp; + vmapp->prev = vmapp; + } else { + tail = vmaphead->prev; + vmaphead->prev = vmapp; + tail->next = vmapp; + vmapp->next = vmaphead; + vmapp->prev = tail; + } + DEBUG_MSG("num_pmds: %d num_pmds_valid %d\n", num_pmds, num_pmds_valid); + + /* transfer the linked list to an array */ + cur = vmaphead; + gvmem_pfns = (struct vmap_pfns *)malloc(sizeof(struct vmap_pfns) * groups); + i = 0; + do { + vmapp = gvmem_pfns + i; + vmapp->vmap_pfn_start = cur->vmap_pfn_start; + vmapp->vmap_pfn_end = cur->vmap_pfn_end; + vmapp->rep_pfn_start = cur->rep_pfn_start; + vmapp->rep_pfn_end = cur->rep_pfn_end; + cur = cur->next; + free(cur->prev); + i++; + } while (cur != vmaphead); + nr_gvmem_pfns = i; + return COMPLETED; +} + +#endif /* x86_64 */ + @@ -0,0 +1,138 @@ +/* + * cache.h + * + * Created by: Petr Tesarik <ptesarik@suse.cz> + * + * Copyright (c) 2012 SUSE LINUX Products GmbH, Nuernberg, Germany. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "makedumpfile.h" +#include "cache.h" +#include "print_info.h" + +struct cache { + struct cache_entry *head, *tail; +}; + +/* 8 pages covers 4-level paging plus 4 data pages */ +#define CACHE_SIZE 8 +static struct cache_entry entries[CACHE_SIZE]; +static struct cache_entry *pool[CACHE_SIZE]; +static int avail = CACHE_SIZE; + +static struct cache used, pending; + +static void *cachebuf; + +int +cache_init(void) +{ + int i; + + cachebuf = malloc(info->page_size * CACHE_SIZE); + if (cachebuf == NULL) { + ERRMSG("Can't allocate memory for cache. %s\n", + strerror(errno)); + return FALSE; + } + + for (i = 0; i < CACHE_SIZE; ++i) + pool[i] = &entries[i]; + + return TRUE; +} + +static void +add_entry(struct cache *cache, struct cache_entry *entry) +{ + entry->next = cache->head; + entry->prev = NULL; + if (cache->head) + cache->head->prev = entry; + cache->head = entry; + if (!cache->tail) + cache->tail = entry; +} + +static void +remove_entry(struct cache *cache, struct cache_entry *entry) +{ + if (entry->next) + entry->next->prev = entry->prev; + else + cache->tail = entry->prev; + + if (entry->prev) + entry->prev->next = entry->next; + else + cache->head = entry->next; +} + +void * +cache_search(unsigned long long paddr, unsigned long length) +{ + struct cache_entry *entry; + for (entry = used.head; entry; entry = entry->next) { + size_t off = paddr - entry->paddr; + if (off < entry->buflen && + length <= entry->buflen - off) { + if (entry != used.head) { + remove_entry(&used, entry); + add_entry(&used, entry); + } + return entry->bufptr + off; + } + } + + return NULL; /* cache miss */ +} + +struct cache_entry * +cache_alloc(unsigned long long paddr) +{ + struct cache_entry *entry = NULL; + int idx; + + if (avail) { + entry = pool[--avail]; + } else if (used.tail) { + entry = used.tail; + remove_entry(&used, entry); + if (entry->discard) + entry->discard(entry); + } else + return NULL; + + idx = entry - entries; + entry->paddr = paddr; + entry->bufptr = cachebuf + idx * info->page_size; + entry->buflen = info->page_size; + entry->discard = NULL; + add_entry(&pending, entry); + + return entry; +} + +void +cache_add(struct cache_entry *entry) +{ + remove_entry(&pending, entry); + add_entry(&used, entry); +} + +void +cache_free(struct cache_entry *entry) +{ + remove_entry(&pending, entry); + pool[avail++] = entry; +} @@ -0,0 +1,37 @@ +/* + * cache.h + * + * Written by: Petr Tesarik <ptesarik@suse.cz> + * + * Copyright (c) 2012 SUSE LINUX Products GmbH, Nuernberg, Germany. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _CACHE_H +#define _CACHE_H + +struct cache_entry { + unsigned long long paddr; + void *bufptr; + unsigned long buflen; + struct cache_entry *next, *prev; + + void (*discard)(struct cache_entry *); +}; + +int cache_init(void); +void *cache_search(unsigned long long paddr, unsigned long length); +struct cache_entry *cache_alloc(unsigned long long paddr); +void cache_add(struct cache_entry *entry); +void cache_free(struct cache_entry *entry); + +#endif /* _CACHE_H */ diff --git a/common.h b/common.h new file mode 100644 index 0000000..6e2f657 --- /dev/null +++ b/common.h @@ -0,0 +1,54 @@ +/* + * common.h + * + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _COMMON_H +#define _COMMON_H + +#define TRUE (1) +#define FALSE (0) +#define ERROR (-1) +#define UNUSED (-1) +#define RETURN_ON_ERROR (0x2) + +#ifndef LONG_MAX +#define LONG_MAX ((long)(~0UL>>1)) +#endif +#ifndef ULONG_MAX +#define ULONG_MAX (~0UL) +#endif +#define ULONGLONG_MAX (~0ULL) + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +#define divideup(x, y) (((x) + ((y) - 1)) / (y)) +#define round(x, y) (((x) / (y)) * (y)) +#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) + +#define NUM_HEX (0x1) +#define NUM_DEC (0x2) +#define NUM_EXPR (0x4) +#define NUM_ANY (NUM_HEX|NUM_DEC|NUM_EXPR) + +/* + * Incorrect address + */ +#define NOT_MEMMAP_ADDR (0x0) +#define NOT_KV_ADDR (0x0) +#define NOT_PADDR (ULONGLONG_MAX) +#define BADADDR ((ulong)(-1)) + +#endif /* COMMON_H */ + diff --git a/diskdump_mod.h b/diskdump_mod.h new file mode 100644 index 0000000..2676817 --- /dev/null +++ b/diskdump_mod.h @@ -0,0 +1,115 @@ +/* + * diskdump.h + * + * Copyright (C) 2004, 2005 David Anderson + * Copyright (C) 2004, 2005 Red Hat, Inc. All rights reserved. + * Copyright (C) 2005 FUJITSU LIMITED + * Copyright (C) 2005 NEC Corporation + * + * This software may be freely redistributed under the terms of the + * GNU General Public License. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef _DISKDUMP_MOD_H +#define _DISKDUMP_MOD_H + +#include <elf.h> + +#define DUMP_PARTITION_SIGNATURE "diskdump" +#define DISK_DUMP_SIGNATURE "DISKDUMP" +#define KDUMP_SIGNATURE "KDUMP " +#define SIG_LEN (sizeof(DUMP_PARTITION_SIGNATURE) - 1) +#define DISKDUMP_HEADER_BLOCKS (1) + +/* + * These are all remnants of the old "diskdump" facility, + * none of them are ever used by makedumpfile. + */ +#define DUMP_HEADER_COMPLETED 0 +#define DUMP_HEADER_INCOMPLETED 1 +#define DUMP_HEADER_COMPRESSED 8 + +struct new_utsname { + char sysname[65]; + char nodename[65]; + char release[65]; + char version[65]; + char machine[65]; + char domainname[65]; +}; + +struct disk_dump_header { + char signature[SIG_LEN]; /* = "KDUMP " */ + int header_version; /* Dump header version */ + struct new_utsname utsname; /* copy of system_utsname */ + struct timeval timestamp; /* Time stamp */ + unsigned int status; /* Above flags */ + int block_size; /* Size of a block in byte */ + int sub_hdr_size; /* Size of arch dependent + header in blocks */ + unsigned int bitmap_blocks; /* Size of Memory bitmap in + block */ + unsigned int max_mapnr; /* = max_mapnr, OBSOLETE! + 32bit only, full 64bit + in sub header. */ + unsigned int total_ram_blocks;/* Number of blocks should be + written */ + unsigned int device_blocks; /* Number of total blocks in + * the dump device */ + unsigned int written_blocks; /* Number of written blocks */ + unsigned int current_cpu; /* CPU# which handles dump */ + int nr_cpus; /* Number of CPUs */ + struct task_struct *tasks[0]; +}; + +/* + * Sub header for KDUMP + * But Common header of KDUMP is disk_dump_header of diskdump. + */ +struct kdump_sub_header { + unsigned long phys_base; + int dump_level; /* header_version 1 and later */ + int split; /* header_version 2 and later */ + unsigned long start_pfn; /* header_version 2 and later, + OBSOLETE! 32bit only, full + 64bit in start_pfn_64. */ + unsigned long end_pfn; /* header_version 2 and later, + OBSOLETE! 32bit only, full + 64bit in end_pfn_64. */ + off_t offset_vmcoreinfo;/* header_version 3 and later */ + unsigned long size_vmcoreinfo; /* header_version 3 and later */ + off_t offset_note; /* header_version 4 and later */ + unsigned long size_note; /* header_version 4 and later */ + off_t offset_eraseinfo; /* header_version 5 and later */ + unsigned long size_eraseinfo; /* header_version 5 and later */ + unsigned long long start_pfn_64; /* header_version 6 and later */ + unsigned long long end_pfn_64; /* header_version 6 and later */ + unsigned long long max_mapnr_64; /* header_version 6 and later */ +}; + +/* page flags */ +#define DUMP_DH_COMPRESSED_ZLIB 0x1 /* page is compressed with zlib */ +#define DUMP_DH_COMPRESSED_LZO 0x2 /* paged is compressed with lzo */ +#define DUMP_DH_COMPRESSED_SNAPPY 0x4 + /* paged is compressed with snappy */ +#define DUMP_DH_COMPRESSED_INCOMPLETE 0x8 + /* indicate an incomplete dumpfile */ +#define DUMP_DH_EXCLUDED_VMEMMAP 0x10 /* unused vmemmap pages are excluded */ + +/* descriptor of each page for vmcore */ +typedef struct page_desc { + off_t offset; /* the offset of the page data*/ + unsigned int size; /* the size of this dump page */ + unsigned int flags; /* flags */ + unsigned long long page_flags; /* page flags */ +} page_desc_t; + +#define DISKDUMP_CACHED_PAGES (16) +#define PAGE_VALID (0x1) /* flags */ +#define DISKDUMP_VALID_PAGE(flags) ((flags) & PAGE_VALID) + +#endif /* DISKDUMP_MOD_H */ + diff --git a/dwarf_info.c b/dwarf_info.c new file mode 100644 index 0000000..4f9ad12 --- /dev/null +++ b/dwarf_info.c @@ -0,0 +1,1619 @@ +/* + * dwarf_info.c + * + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <elfutils/libdw.h> +#include <elfutils/libdwfl.h> +#include <dwarf.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> + +#include "common.h" +#include "print_info.h" +#include "dwarf_info.h" + +/* + * Debugging information + */ +#define DEFAULT_DEBUGINFO_PATH "/usr/lib/debug" + +struct dwarf_info { + unsigned int cmd; /* IN */ + int fd_debuginfo; /* IN */ + char *name_debuginfo; /* IN */ + char *module_name; /* IN */ + char *struct_name; /* IN */ + char *symbol_name; /* IN */ + char *member_name; /* IN */ + char *enum_name; /* IN */ + Elf *elfd; /* OUT */ + Dwarf *dwarfd; /* OUT */ + Dwfl *dwfl; /* OUT */ + char *type_name; /* OUT */ + long struct_size; /* OUT */ + long member_offset; /* OUT */ + long array_length; /* OUT */ + long enum_number; /* OUT */ + unsigned char type_flag; /* OUT */ + char src_name[LEN_SRCFILE]; /* OUT */ + Dwarf_Off die_offset; /* OUT */ +}; +static struct dwarf_info dwarf_info = { + .fd_debuginfo = -1, +}; + + +/* + * Internal functions. + */ +static int +is_search_structure(int cmd) +{ + if ((cmd == DWARF_INFO_GET_STRUCT_SIZE) + || (cmd == DWARF_INFO_GET_MEMBER_OFFSET) + || (cmd == DWARF_INFO_GET_MEMBER_TYPE) + || (cmd == DWARF_INFO_GET_MEMBER_OFFSET_1ST_UNION) + || (cmd == DWARF_INFO_GET_MEMBER_ARRAY_LENGTH)) + return TRUE; + else + return FALSE; +} + +static int +is_search_number(int cmd) +{ + if (cmd == DWARF_INFO_GET_ENUM_NUMBER) + return TRUE; + else + return FALSE; +} + +static int +is_search_symbol(int cmd) +{ + if ((cmd == DWARF_INFO_GET_SYMBOL_ARRAY_LENGTH) + || (cmd == DWARF_INFO_GET_SYMBOL_TYPE) + || (cmd == DWARF_INFO_CHECK_SYMBOL_ARRAY_TYPE)) + return TRUE; + else + return FALSE; +} + +static int +is_search_typedef(int cmd) +{ + if ((cmd == DWARF_INFO_GET_TYPEDEF_SIZE) + || (cmd == DWARF_INFO_GET_TYPEDEF_SRCNAME)) + return TRUE; + else + return FALSE; +} + +static int +is_search_domain(int cmd) +{ + if ((cmd == DWARF_INFO_GET_DOMAIN_STRUCT) + || (cmd == DWARF_INFO_GET_DOMAIN_TYPEDEF) + || (cmd == DWARF_INFO_GET_DOMAIN_ARRAY) + || (cmd == DWARF_INFO_GET_DOMAIN_UNION) + || (cmd == DWARF_INFO_GET_DOMAIN_ENUM) + || (cmd == DWARF_INFO_GET_DOMAIN_REF) + || (cmd == DWARF_INFO_GET_DOMAIN_STRING) + || (cmd == DWARF_INFO_GET_DOMAIN_BASE)) + return TRUE; + else + return FALSE; +} + +static int +is_search_die(int cmd) +{ + if (cmd == DWARF_INFO_GET_DIE) + return TRUE; + else + return FALSE; +} + +static int +process_module (Dwfl_Module *dwflmod, + void **userdata __attribute__ ((unused)), + const char *name __attribute__ ((unused)), + Dwarf_Addr base __attribute__ ((unused)), + void *arg) +{ + const char *fname, *mod_name, *debugfile; + Dwarf_Addr dwbias; + + /* get a debug context descriptor.*/ + dwarf_info.dwarfd = dwfl_module_getdwarf (dwflmod, &dwbias); + if (dwarf_info.dwarfd == NULL) { + ERRMSG("dwfl_module_getdwarf error.\n"); + return DWARF_CB_ABORT; + } + dwarf_info.elfd = dwarf_getelf(dwarf_info.dwarfd); + + mod_name = dwfl_module_info(dwflmod, NULL, NULL, NULL, NULL, NULL, + &fname, &debugfile); + + if (!strcmp(dwarf_info.module_name, mod_name) && + !dwarf_info.name_debuginfo && debugfile) { + /* + * Store the debuginfo filename. Next time we will + * open debuginfo file direclty instead of searching + * for it again. + */ + dwarf_info.name_debuginfo = strdup(debugfile); + } + + return DWARF_CB_OK; +} + +static int +dwfl_report_module_p(const char *modname, const char *filename) +{ + if (filename && !strcmp(modname, dwarf_info.module_name)) + return 1; + return 0; +} + +static void +clean_dwfl_info(void) +{ + if (dwarf_info.dwfl) + dwfl_end(dwarf_info.dwfl); + + dwarf_info.dwfl = NULL; + dwarf_info.dwarfd = NULL; + dwarf_info.elfd = NULL; +} + +/* + * Search module debuginfo. + * This function searches for module debuginfo in default debuginfo path for + * a given module in dwarf_info.module_name. + * + * On success, dwarf_info.name_debuginfo is set to absolute path of + * module debuginfo. + */ +static int +search_module_debuginfo(char *os_release) +{ + Dwfl *dwfl = NULL; + static char *debuginfo_path = DEFAULT_DEBUGINFO_PATH; + static const Dwfl_Callbacks callbacks = { + .section_address = dwfl_offline_section_address, + .find_debuginfo = dwfl_standard_find_debuginfo, + .debuginfo_path = &debuginfo_path, + }; + + /* + * Check if We already have debuginfo file name with us. If yes, + * then we don't need to proceed with search method. + */ + if (dwarf_info.name_debuginfo) + return TRUE; + + if ((dwfl = dwfl_begin(&callbacks)) == NULL) { + ERRMSG("Can't create a handle for a new dwfl session.\n"); + return FALSE; + } + + /* Search for module debuginfo file. */ + if (dwfl_linux_kernel_report_offline(dwfl, + os_release, + &dwfl_report_module_p)) { + ERRMSG("Can't get Module debuginfo for module '%s'\n", + dwarf_info.module_name); + dwfl_end(dwfl); + return FALSE; + } + dwfl_report_end(dwfl, NULL, NULL); + dwfl_getmodules(dwfl, &process_module, NULL, 0); + + dwfl_end(dwfl); + clean_dwfl_info(); + + /* Return success if module debuginfo is found. */ + if (dwarf_info.name_debuginfo) + return TRUE; + + return FALSE; +} + +static int +dwarf_no_debuginfo_found(Dwfl_Module *mod, void **userdata, + const char *modname, Dwarf_Addr base, + const char *file_name, + const char *debuglink_file, GElf_Word debuglink_crc, + char **debuginfo_file_name) +{ + return -1; +} + +/* + * Initialize the dwarf info. + * Linux kernel module debuginfo are of ET_REL (relocatable) type. + * This function uses dwfl API's to apply relocation before reading the + * dwarf information from module debuginfo. + * On success, this function sets the dwarf_info.elfd and dwarf_info.dwarfd + * after applying relocation to module debuginfo. + */ +static int +init_dwarf_info(void) +{ + Dwfl *dwfl = NULL; + int dwfl_fd = -1; + static const Dwfl_Callbacks callbacks = { + .section_address = dwfl_offline_section_address, + /* + * By the time init_dwarf_info() function is called, we already + * know absolute path of debuginfo either resolved through + * search_module_debuginfo() call OR user specified vmlinux + * debuginfo through '-x' option. In which case .find_debuginfo + * callback is never invoked. + * But we can not deny a situation where user may pass invalid + * file name through '-x' option, where .find_debuginfo gets + * invoked to find a valid vmlinux debuginfo and hence we run + * into seg fault issue. Hence, set .find_debuginfo to a + * funtion pointer that returns -1 to avoid seg fault and let + * the makedumpfile throw error messages against the invalid + * vmlinux file input. + */ + .find_debuginfo = dwarf_no_debuginfo_found + }; + + dwarf_info.elfd = NULL; + dwarf_info.dwarfd = NULL; + + /* + * We already know the absolute path of debuginfo file. Fail if we + * still don't have one. Ideally we should never be in this situation. + */ + if (!dwarf_info.name_debuginfo) { + ERRMSG("Can't find absolute path to debuginfo file.\n"); + return FALSE; + } + + if ((dwfl = dwfl_begin(&callbacks)) == NULL) { + ERRMSG("Can't create a handle for a new dwfl session.\n"); + return FALSE; + } + + /* Open the debuginfo file if it is not already open. */ + if (dwarf_info.fd_debuginfo < 0) + dwarf_info.fd_debuginfo = + open(dwarf_info.name_debuginfo, O_RDONLY); + + dwfl_fd = dup(dwarf_info.fd_debuginfo); + if (dwfl_fd < 0) { + ERRMSG("Failed to get a duplicate handle for" + " debuginfo.\n"); + goto err_out; + } + /* Apply relocations. */ + if (dwfl_report_offline(dwfl, dwarf_info.module_name, + dwarf_info.name_debuginfo, dwfl_fd) == NULL) { + ERRMSG("Failed reading %s: %s\n", + dwarf_info.name_debuginfo, dwfl_errmsg (-1)); + /* dwfl_fd is consumed on success, not on failure */ + close(dwfl_fd); + goto err_out; + } + dwfl_report_end(dwfl, NULL, NULL); + + dwfl_getmodules(dwfl, &process_module, NULL, 0); + + if (dwarf_info.elfd == NULL) { + ERRMSG("Can't get first elf header of %s.\n", + dwarf_info.name_debuginfo); + goto err_out; + } + + if (dwarf_info.dwarfd == NULL) { + ERRMSG("Can't get debug context descriptor for %s.\n", + dwarf_info.name_debuginfo); + goto err_out; + } + dwarf_info.dwfl = dwfl; + return TRUE; +err_out: + if (dwfl) + dwfl_end(dwfl); + + return FALSE; +} + +static int +get_data_member_location(Dwarf_Die *die, long *offset) +{ + size_t expcnt; + Dwarf_Attribute attr; + Dwarf_Op *expr; + + if (dwarf_attr(die, DW_AT_data_member_location, &attr) == NULL) + return FALSE; + + if (dwarf_getlocation(&attr, &expr, &expcnt) < 0) + return FALSE; + + (*offset) = expr[0].number; + + return TRUE; +} + +static int +get_die_type(Dwarf_Die *die, Dwarf_Die *die_type) +{ + Dwarf_Attribute attr; + + if (dwarf_attr(die, DW_AT_type, &attr) == NULL) + return FALSE; + + if (dwarf_formref_die(&attr, die_type) < 0) { + ERRMSG("Can't get CU die.\n"); + return FALSE; + } + return TRUE; +} + +static int +get_data_array_length(Dwarf_Die *die) +{ + int tag; + Dwarf_Attribute attr; + Dwarf_Die die_type; + Dwarf_Word upper_bound; + + if (!get_die_type(die, &die_type)) { + ERRMSG("Can't get CU die of DW_AT_type.\n"); + return FALSE; + } + tag = dwarf_tag(&die_type); + if (tag == DW_TAG_const_type) { + /* This array is of const type. Get the die type again */ + if (!get_die_type(&die_type, &die_type)) { + ERRMSG("Can't get CU die of DW_AT_type.\n"); + return FALSE; + } + tag = dwarf_tag(&die_type); + } + if (tag != DW_TAG_array_type) { + /* + * This kernel doesn't have the member of array. + */ + return TRUE; + } + + /* + * Get the demanded array length. + */ + dwarf_child(&die_type, &die_type); + do { + tag = dwarf_tag(&die_type); + if (tag == DW_TAG_subrange_type) + break; + } while (dwarf_siblingof(&die_type, &die_type)); + + if (tag != DW_TAG_subrange_type) + return FALSE; + + if (dwarf_attr(&die_type, DW_AT_upper_bound, &attr) == NULL) + return FALSE; + + if (dwarf_formudata(&attr, &upper_bound) < 0) + return FALSE; + + if (upper_bound < 0) + return FALSE; + + dwarf_info.array_length = upper_bound + 1; + + return TRUE; +} + +static int +check_array_type(Dwarf_Die *die) +{ + int tag; + Dwarf_Die die_type; + + if (!get_die_type(die, &die_type)) { + ERRMSG("Can't get CU die of DW_AT_type.\n"); + return FALSE; + } + tag = dwarf_tag(&die_type); + if (tag == DW_TAG_array_type) + dwarf_info.array_length = FOUND_ARRAY_TYPE; + + return TRUE; +} + +static int +get_dwarf_base_type(Dwarf_Die *die) +{ + int tag; + const char *name; + + while (get_die_type(die, die)) { + tag = dwarf_tag(die); + switch (tag) { + case DW_TAG_array_type: + dwarf_info.type_flag |= TYPE_ARRAY; + break; + case DW_TAG_pointer_type: + dwarf_info.type_flag |= TYPE_PTR; + break; + case DW_TAG_structure_type: + dwarf_info.type_flag |= TYPE_STRUCT; + break; + case DW_TAG_base_type: + dwarf_info.type_flag |= TYPE_BASE; + break; + } + } + + name = dwarf_diename(die); + if (name) + dwarf_info.type_name = strdup(name); + else if (dwarf_info.type_flag == TYPE_PTR) + dwarf_info.type_name = strdup("void"); + + dwarf_info.struct_size = dwarf_bytesize(die); + + return TRUE; +} + +/* + * Get the die, given the offset + */ +static int +get_die_from_offset(Dwarf_Off offset, Dwarf_Die *die) +{ + if (!init_dwarf_info()) + return FALSE; + + if ((!offset) || (!die)) + return FALSE; + + if (!dwarf_offdie(dwarf_info.dwarfd, offset, die)) { + return FALSE; + } + + return TRUE; +} + +/* + * Function for searching struct page.union.struct.mapping. + */ +static int +is_container(Dwarf_Die *die) +{ + if (dwarf_tag(die) == DW_TAG_structure_type) + return TRUE; + if (dwarf_info.cmd != DWARF_INFO_GET_MEMBER_OFFSET_1ST_UNION + && dwarf_tag(die) == DW_TAG_union_type) + return TRUE; + return FALSE; +} + +static void +adjust_member_offset(Dwarf_Die *die) +{ + long offset; + + if (dwarf_info.member_offset == NOT_FOUND_STRUCTURE) + return; + if (!get_data_member_location(die, &offset)) + return; + dwarf_info.member_offset += offset; +} + +static int +search_member(Dwarf_Die *die) +{ + int tag; + long offset; + const char *name; + Dwarf_Die child, *walker, die_type; + + if (dwarf_child(die, &child) != 0) + return FALSE; + + walker = &child; + + do { + tag = dwarf_tag(walker); + name = dwarf_diename(walker); + + if (tag != DW_TAG_member) + continue; + + /* + * Descend into structures/unions and search for member + * there. + */ + if ((!name) || (strcmp(name, dwarf_info.member_name) != 0)) { + if (!get_die_type(walker, &die_type)) + continue; + if (is_container(&die_type)) + if (search_member(&die_type)) { + adjust_member_offset(walker); + return TRUE; + } + } + + switch (dwarf_info.cmd) { + case DWARF_INFO_GET_MEMBER_TYPE: + if ((!name) || strcmp(name, dwarf_info.member_name)) + continue; + /* + * Get the member offset. + */ + if (!get_dwarf_base_type(walker)) + continue; + return TRUE; + case DWARF_INFO_GET_MEMBER_OFFSET: + if ((!name) || strcmp(name, dwarf_info.member_name)) + continue; + /* + * Get the member offset. + */ + if (dwarf_tag(die) == DW_TAG_union_type) + offset = 0; + else if (!get_data_member_location(walker, &offset)) + continue; + dwarf_info.member_offset = offset; + return TRUE; + case DWARF_INFO_GET_MEMBER_OFFSET_1ST_UNION: + if (!get_die_type(walker, &die_type)) + continue; + if (dwarf_tag(&die_type) != DW_TAG_union_type) + continue; + /* + * Get the member offset. + */ + if (!get_data_member_location(walker, &offset)) + continue; + dwarf_info.member_offset = offset; + return TRUE; + case DWARF_INFO_GET_MEMBER_ARRAY_LENGTH: + if ((!name) || strcmp(name, dwarf_info.member_name)) + continue; + /* + * Get the member length. + */ + if (!get_data_array_length(walker)) + continue; + return TRUE; + } + } while (!dwarf_siblingof(walker, walker)); + + /* + * Return even if not found. + */ + return FALSE; +} + +static void +search_structure(Dwarf_Die *die, int *found) +{ + int tag; + const char *name; + + /* + * If we get to here then we don't have any more + * children, check to see if this is a relevant tag + */ + do { + tag = dwarf_tag(die); + name = dwarf_diename(die); + if ((tag != DW_TAG_structure_type) || (!name) + || strcmp(name, dwarf_info.struct_name)) + continue; + /* + * Skip if DW_AT_byte_size is not included. + */ + dwarf_info.struct_size = dwarf_bytesize(die); + + if (dwarf_info.struct_size > 0) + break; + + } while (!dwarf_siblingof(die, die)); + + if (dwarf_info.struct_size <= 0) { + /* + * Not found the demanded structure. + */ + return; + } + + /* + * Found the demanded structure. + */ + *found = TRUE; + switch (dwarf_info.cmd) { + case DWARF_INFO_GET_STRUCT_SIZE: + break; + case DWARF_INFO_GET_MEMBER_TYPE: + case DWARF_INFO_GET_MEMBER_OFFSET: + case DWARF_INFO_GET_MEMBER_OFFSET_1ST_UNION: + case DWARF_INFO_GET_MEMBER_ARRAY_LENGTH: + search_member(die); + break; + } +} + +static void +search_number(Dwarf_Die *die, int *found) +{ + int tag, bytesize; + Dwarf_Word const_value; + Dwarf_Attribute attr; + Dwarf_Die child, *walker; + const char *name; + + do { + tag = dwarf_tag(die); + if (tag != DW_TAG_enumeration_type) + continue; + + if (dwarf_info.cmd == DWARF_INFO_GET_ENUMERATION_TYPE_SIZE) { + name = dwarf_diename(die); + + if (!name || strcmp(name, dwarf_info.struct_name)) + continue; + + if ((bytesize = dwarf_bytesize(die)) <= 0) + continue; + + *found = TRUE; + + dwarf_info.struct_size = bytesize; + + return; + } + + if (dwarf_child(die, &child) != 0) + continue; + + walker = &child; + + do { + tag = dwarf_tag(walker); + name = dwarf_diename(walker); + + if ((tag != DW_TAG_enumerator) || (!name) + || strcmp(name, dwarf_info.enum_name)) + continue; + + if (!dwarf_attr(walker, DW_AT_const_value, &attr)) + continue; + + if (dwarf_formudata(&attr, &const_value) < 0) + continue; + + *found = TRUE; + dwarf_info.enum_number = (long)const_value; + + } while (!dwarf_siblingof(walker, walker)); + + } while (!dwarf_siblingof(die, die)); +} + +static void +search_typedef(Dwarf_Die *die, int *found) +{ + int tag = 0; + char *src_name = NULL; + const char *name; + Dwarf_Die die_type; + + /* + * If we get to here then we don't have any more + * children, check to see if this is a relevant tag + */ + do { + tag = dwarf_tag(die); + name = dwarf_diename(die); + + if ((tag != DW_TAG_typedef) || (!name) + || strcmp(name, dwarf_info.struct_name)) + continue; + + if (dwarf_info.cmd == DWARF_INFO_GET_TYPEDEF_SIZE) { + if (!get_die_type(die, &die_type)) { + ERRMSG("Can't get CU die of DW_AT_type.\n"); + break; + } + dwarf_info.struct_size = dwarf_bytesize(&die_type); + if (dwarf_info.struct_size <= 0) + continue; + + *found = TRUE; + break; + } else if (dwarf_info.cmd == DWARF_INFO_GET_TYPEDEF_SRCNAME) { + src_name = (char *)dwarf_decl_file(die); + if (!src_name) + continue; + + *found = TRUE; + strncpy(dwarf_info.src_name, src_name, LEN_SRCFILE); + break; + } + } while (!dwarf_siblingof(die, die)); +} + +static void +search_symbol(Dwarf_Die *die, int *found) +{ + int tag; + const char *name; + + /* + * If we get to here then we don't have any more + * children, check to see if this is a relevant tag + */ + do { + tag = dwarf_tag(die); + name = dwarf_diename(die); + + if ((tag == DW_TAG_variable) && (name) + && !strcmp(name, dwarf_info.symbol_name)) + break; + + } while (!dwarf_siblingof(die, die)); + + if ((tag != DW_TAG_variable) || (!name) + || strcmp(name, dwarf_info.symbol_name)) { + /* + * Not found the demanded symbol. + */ + return; + } + + /* + * Found the demanded symbol. + */ + *found = TRUE; + switch (dwarf_info.cmd) { + case DWARF_INFO_GET_SYMBOL_ARRAY_LENGTH: + get_data_array_length(die); + break; + case DWARF_INFO_CHECK_SYMBOL_ARRAY_TYPE: + check_array_type(die); + break; + case DWARF_INFO_GET_SYMBOL_TYPE: + get_dwarf_base_type(die); + break; + } +} + +static void +search_domain(Dwarf_Die *die, int *found) +{ + int tag; + const char *name; + short flag = 0; + Dwarf_Die die_type; + + do { + tag = dwarf_tag(die); + name = dwarf_diename(die); + + /* + * Descend into members and search for the + * needed domain there. + */ + if ((!name) || strcmp(name, dwarf_info.symbol_name)) { + if (!get_die_type(die, &die_type)) + continue; + + if (is_container(&die_type)) { + Dwarf_Die child; + + if (dwarf_child(&die_type, &child) != 0) + continue; + + search_domain(&child, found); + + if (*found) + return; + } + } + + if ((!name) || strcmp(name, dwarf_info.symbol_name)) + continue; + + switch (dwarf_info.cmd) { + case DWARF_INFO_GET_DOMAIN_STRUCT: + if (tag == DW_TAG_structure_type) + flag = 1; + break; + case DWARF_INFO_GET_DOMAIN_UNION: + if (tag == DW_TAG_union_type) + flag = 1; + break; + case DWARF_INFO_GET_DOMAIN_TYPEDEF: + if (tag == DW_TAG_typedef) + flag = 1; + break; + /* TODO + * Implement functionality for the rest of the domains + */ + } + + if (!flag) + continue; + + dwarf_info.struct_size = dwarf_bytesize(die); + + if (dwarf_info.struct_size > 0) { + if (found) + *found = TRUE; + dwarf_info.die_offset = dwarf_dieoffset(die); + break; + } + } while (!dwarf_siblingof(die, die)); +} + +static void +search_die(Dwarf_Die *die, int *found) +{ + const char *name; + + do { + name = dwarf_diename(die); + + if ((!name) || strcmp(name, dwarf_info.symbol_name)) + continue; + + if (found) + *found = TRUE; + + dwarf_info.die_offset = dwarf_dieoffset(die); + return; + } while (!dwarf_siblingof(die, die)); +} + +static void +search_die_tree(Dwarf_Die *die, int *found) +{ + Dwarf_Die child; + + /* + * start by looking at the children + */ + if (dwarf_child(die, &child) == 0) + search_die_tree(&child, found); + + if (*found) + return; + + if (is_search_structure(dwarf_info.cmd)) + search_structure(die, found); + + else if (is_search_number(dwarf_info.cmd)) + search_number(die, found); + + else if (is_search_symbol(dwarf_info.cmd)) + search_symbol(die, found); + + else if (is_search_typedef(dwarf_info.cmd)) + search_typedef(die, found); + + else if (is_search_domain(dwarf_info.cmd)) + search_domain(die, found); + + else if (is_search_die(dwarf_info.cmd)) + search_die(die, found); +} + +static int +get_debug_info(void) +{ + int found = FALSE; + char *name = NULL; + size_t shstrndx, header_size; + uint8_t address_size, offset_size; + Dwarf *dwarfd = NULL; + Elf *elfd = NULL; + Dwarf_Off off = 0, next_off = 0, abbrev_offset = 0; + Elf_Scn *scn = NULL; + GElf_Shdr scnhdr_mem, *scnhdr = NULL; + Dwarf_Die cu_die; + + int ret = FALSE; + + if (!init_dwarf_info()) + return FALSE; + + elfd = dwarf_info.elfd; + dwarfd = dwarf_info.dwarfd; + + if (elf_getshdrstrndx(elfd, &shstrndx) < 0) { + ERRMSG("Can't get the section index of the string table.\n"); + goto out; + } + + /* + * Search for ".debug_info" section. + */ + while ((scn = elf_nextscn(elfd, scn)) != NULL) { + scnhdr = gelf_getshdr(scn, &scnhdr_mem); + name = elf_strptr(elfd, shstrndx, scnhdr->sh_name); + if (!strcmp(name, ".debug_info")) + break; + } + if (strcmp(name, ".debug_info")) { + ERRMSG("Can't get .debug_info section.\n"); + goto out; + } + + /* + * Search by each CompileUnit. + */ + while (dwarf_nextcu(dwarfd, off, &next_off, &header_size, + &abbrev_offset, &address_size, &offset_size) == 0) { + off += header_size; + if (dwarf_offdie(dwarfd, off, &cu_die) == NULL) { + ERRMSG("Can't get CU die.\n"); + goto out; + } + search_die_tree(&cu_die, &found); + if (found) + break; + off = next_off; + } + ret = TRUE; +out: + clean_dwfl_info(); + + return ret; +} + + +/* + * External functions. + */ +char * +get_dwarf_module_name(void) +{ + return dwarf_info.module_name; +} + +void +get_fileinfo_of_debuginfo(int *fd, char **name) +{ + *fd = dwarf_info.fd_debuginfo; + *name = dwarf_info.name_debuginfo; +} + +unsigned long long +get_symbol_addr(char *symname) +{ + int i; + unsigned long long symbol = NOT_FOUND_SYMBOL; + Elf *elfd = NULL; + GElf_Shdr shdr; + GElf_Sym sym; + Elf_Data *data = NULL; + Elf_Scn *scn = NULL; + char *sym_name = NULL; + + if (!init_dwarf_info()) + return NOT_FOUND_SYMBOL; + + elfd = dwarf_info.elfd; + + while ((scn = elf_nextscn(elfd, scn)) != NULL) { + if (gelf_getshdr(scn, &shdr) == NULL) { + ERRMSG("Can't get section header.\n"); + goto out; + } + if (shdr.sh_type == SHT_SYMTAB) + break; + } + if (!scn) { + ERRMSG("Can't find symbol table.\n"); + goto out; + } + + data = elf_getdata(scn, data); + + if ((!data) || (data->d_size == 0)) { + ERRMSG("No data in symbol table.\n"); + goto out; + } + + for (i = 0; i < (shdr.sh_size/shdr.sh_entsize); i++) { + if (gelf_getsym(data, i, &sym) == NULL) { + ERRMSG("Can't get symbol at index %d.\n", i); + goto out; + } + sym_name = elf_strptr(elfd, shdr.sh_link, sym.st_name); + + if (sym_name == NULL) + continue; + + if (!strcmp(sym_name, symname)) { + symbol = sym.st_value; + break; + } + } +out: + clean_dwfl_info(); + + return symbol; +} + +unsigned long +get_next_symbol_addr(char *symname) +{ + int i; + unsigned long symbol = NOT_FOUND_SYMBOL; + unsigned long next_symbol = NOT_FOUND_SYMBOL; + Elf *elfd = NULL; + GElf_Shdr shdr; + GElf_Sym sym; + Elf_Data *data = NULL; + Elf_Scn *scn = NULL; + char *sym_name = NULL; + + if (!init_dwarf_info()) + return NOT_FOUND_SYMBOL; + + elfd = dwarf_info.elfd; + + while ((scn = elf_nextscn(elfd, scn)) != NULL) { + if (gelf_getshdr(scn, &shdr) == NULL) { + ERRMSG("Can't get section header.\n"); + goto out; + } + if (shdr.sh_type == SHT_SYMTAB) + break; + } + if (!scn) { + ERRMSG("Can't find symbol table.\n"); + goto out; + } + + data = elf_getdata(scn, data); + + if ((!data) || (data->d_size == 0)) { + ERRMSG("No data in symbol table.\n"); + goto out; + } + + for (i = 0; i < (shdr.sh_size/shdr.sh_entsize); i++) { + if (gelf_getsym(data, i, &sym) == NULL) { + ERRMSG("Can't get symbol at index %d.\n", i); + goto out; + } + sym_name = elf_strptr(elfd, shdr.sh_link, sym.st_name); + + if (sym_name == NULL) + continue; + + if (!strcmp(sym_name, symname)) { + symbol = sym.st_value; + break; + } + } + + if (symbol == NOT_FOUND_SYMBOL) + goto out; + + /* + * Search for next symbol. + */ + for (i = 0; i < (shdr.sh_size/shdr.sh_entsize); i++) { + if (gelf_getsym(data, i, &sym) == NULL) { + ERRMSG("Can't get symbol at index %d.\n", i); + goto out; + } + sym_name = elf_strptr(elfd, shdr.sh_link, sym.st_name); + + if (sym_name == NULL) + continue; + + if (symbol < sym.st_value) { + if (next_symbol == NOT_FOUND_SYMBOL) + next_symbol = sym.st_value; + + else if (sym.st_value < next_symbol) + next_symbol = sym.st_value; + } + } +out: + clean_dwfl_info(); + + return next_symbol; +} + +/* + * Get the size of structure. + */ +long +get_structure_size(char *structname, int flag_typedef) +{ + if (flag_typedef) + dwarf_info.cmd = DWARF_INFO_GET_TYPEDEF_SIZE; + else + dwarf_info.cmd = DWARF_INFO_GET_STRUCT_SIZE; + + dwarf_info.struct_name = structname; + dwarf_info.struct_size = NOT_FOUND_STRUCTURE; + + if (!get_debug_info()) + return FAILED_DWARFINFO; + + return dwarf_info.struct_size; +} + +/* + * Get the size of pointer. + */ +long +get_pointer_size(void) +{ + return sizeof(void *); +} + +/* + * Get the type of given symbol. + */ +char * +get_symbol_type_name(char *symname, int cmd, long *size, + unsigned long *flag) +{ + dwarf_info.cmd = cmd; + dwarf_info.symbol_name = symname; + dwarf_info.type_name = NULL; + dwarf_info.struct_size = NOT_FOUND_STRUCTURE; + dwarf_info.type_flag = 0; + + if (!get_debug_info()) + return NULL; + + if (size) + *size = dwarf_info.struct_size; + + if (flag) + *flag = dwarf_info.type_flag; + + return dwarf_info.type_name; +} + +/* + * Get the offset of member. + */ +long +get_member_offset(char *structname, char *membername, int cmd) +{ + dwarf_info.cmd = cmd; + dwarf_info.struct_name = structname; + dwarf_info.struct_size = NOT_FOUND_STRUCTURE; + dwarf_info.member_offset = NOT_FOUND_STRUCTURE; + + /* + * When searching a offset of 1st union, member_name is unnecessary. + */ + if (dwarf_info.cmd == DWARF_INFO_GET_MEMBER_OFFSET_1ST_UNION) + dwarf_info.member_name = ""; + else + dwarf_info.member_name = membername; + + if (!get_debug_info()) + return FAILED_DWARFINFO; + + return dwarf_info.member_offset; +} + +/* + * Get the type name and size of member. + */ +char * +get_member_type_name(char *structname, char *membername, int cmd, long *size, + unsigned long *flag) +{ + dwarf_info.cmd = cmd; + dwarf_info.struct_name = structname; + dwarf_info.struct_size = NOT_FOUND_STRUCTURE; + dwarf_info.member_name = membername; + dwarf_info.type_name = NULL; + dwarf_info.type_flag = 0; + + if (!get_debug_info()) + return NULL; + + if (dwarf_info.struct_size == NOT_FOUND_STRUCTURE) + return NULL; + + if (size) + *size = dwarf_info.struct_size; + + if (flag) + *flag = dwarf_info.type_flag; + + return dwarf_info.type_name; +} + +/* + * Get the length of array. + */ +long +get_array_length(char *name01, char *name02, unsigned int cmd) +{ + switch (cmd) { + case DWARF_INFO_GET_SYMBOL_ARRAY_LENGTH: + dwarf_info.symbol_name = name01; + break; + case DWARF_INFO_CHECK_SYMBOL_ARRAY_TYPE: + dwarf_info.symbol_name = name01; + break; + case DWARF_INFO_GET_MEMBER_ARRAY_LENGTH: + dwarf_info.struct_name = name01; + dwarf_info.member_name = name02; + break; + } + dwarf_info.cmd = cmd; + dwarf_info.struct_size = NOT_FOUND_STRUCTURE; + dwarf_info.member_offset = NOT_FOUND_STRUCTURE; + dwarf_info.array_length = NOT_FOUND_STRUCTURE; + + if (!get_debug_info()) + return FAILED_DWARFINFO; + + return dwarf_info.array_length; +} + +long +get_enum_number(char *enum_name) +{ + dwarf_info.cmd = DWARF_INFO_GET_ENUM_NUMBER; + dwarf_info.enum_name = enum_name; + dwarf_info.enum_number = NOT_FOUND_NUMBER; + + if (!get_debug_info()) + return FAILED_DWARFINFO; + + return dwarf_info.enum_number; +} + +/* + * Get the source filename. + */ +int +get_source_filename(char *structname, char *src_name, int cmd) +{ + dwarf_info.cmd = cmd; + dwarf_info.struct_name = structname; + + if (!get_debug_info()) + return FALSE; + + strncpy(src_name, dwarf_info.src_name, LEN_SRCFILE); + + return TRUE; +} + +/* + * Get the domain information of the symbol + */ +long +get_domain(char *symname, int cmd, unsigned long long *die) +{ + dwarf_info.cmd = cmd; + dwarf_info.symbol_name = symname; + dwarf_info.type_name = NULL; + dwarf_info.struct_size = NOT_FOUND_STRUCTURE; + dwarf_info.die_offset = 0; + + if (!get_debug_info()) + return 0; + + if (die) + *die = (unsigned long long) dwarf_info.die_offset; + + return dwarf_info.struct_size; +} + +/* + * Get the number of fields in a structure or union provided the + * die offset of the structure or union + */ +int +get_die_nfields(unsigned long long die_off) +{ + int tag, nfields = 0; + Dwarf_Die result, child, *die; + + if (!get_die_from_offset((Dwarf_Off) die_off, &result)) { + return -1; + } + + die = &result; + tag = dwarf_tag(die); + if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) { + clean_dwfl_info(); + return -1; + } + + if (dwarf_child(die, &child) != 0) { + clean_dwfl_info(); + return -1; + } + + /* Find the number of fields in the structure */ + die = &child; + do { + tag = dwarf_tag(die); + if (tag == DW_TAG_member) + nfields++; + else + continue; + } while (!dwarf_siblingof(die, die)); + + clean_dwfl_info(); + return nfields; +} + +/* + * Get the information of the structure member given by index + */ +int +get_die_member(unsigned long long die_off, int index, long *offset, + char **name, int *nbits, int *fbits, unsigned long long *m_die) +{ + const char *diename; + int tag, size, nfields = 0; + Dwarf_Die result, child, die_base, *die; + + if (!offset || !nbits || !fbits || !name || !m_die) + return -1; + + if (!get_die_from_offset((Dwarf_Off) die_off, &result)) { + return -1; + } + + die = &result; + tag = dwarf_tag(die); + if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) { + clean_dwfl_info(); + return -1; + } + + if (dwarf_child(die, &child) != 0) { + clean_dwfl_info(); + return -1; + } + + /* Find the correct field in the structure */ + die = &child; + do { + tag = dwarf_tag(die); + if (tag == DW_TAG_member) { + if (nfields == index) + break; + else + nfields++; + } + } while (!dwarf_siblingof(die, die)); + + if (nfields != index) { + ERRMSG("No member found at index %d.\n", index); + clean_dwfl_info(); + return -1; + } + + /* Fill in the required info for the member */ + if (!get_data_member_location(die, offset)) + *offset = 0; + + diename = dwarf_diename(die); + /* + * Duplicate the string before we pass it to eppic layer. The + * original string returned by dwarf layer will become invalid + * after clean_dwfl_info() call. + */ + if (diename) + *name = strdup(diename); + *m_die = dwarf_dieoffset(die); + + get_die_type(die, &die_base); + if (dwarf_tag(&die_base) == DW_TAG_array_type) { + dwarf_info.array_length = 0; + get_data_array_length(die); + size = dwarf_info.array_length; + } else { + size = dwarf_bytesize(&die_base); + } + + /* TODO + * Correctly update fbits and nbits + */ + *nbits = *fbits = 0; + + clean_dwfl_info(); + if (size < 0) + return 0; + else + return size; +} + +/* + * Get the die attribute type + */ +int +get_die_attr_type(unsigned long long die_off, int *type_flag, + unsigned long long *die_attr_off) +{ + Dwarf_Die result; + + if (!die_attr_off || !die_off) + return FALSE; + + if (!get_die_from_offset((Dwarf_Off) die_off, &result)) { + return FALSE; + } + + if (!get_die_type(&result, &result)) { + clean_dwfl_info(); + return FALSE; + } + + *die_attr_off = dwarf_dieoffset(&result); + *type_flag = dwarf_tag(&result); + clean_dwfl_info(); + return TRUE; +} + +/* + * Get name attribute given the die offset This function is called by eppic + * layer directly as one of the callback functions. + * + * This function returns a pointer to newly allocated string which is a + * duplicate of original string returned from dwarf APIs. The reason for doing + * this is because the original string returned by dwarf layer will become + * invalid (freed) as soon as we close the dwarf handle through + * clean_dwfl_info(). This avoids the segfault when caller (eppic layer) of + * this function tries to access the string pointer. + * + * NOTE: It is callers responsibility to free the memory of new string. + */ +char * +get_die_name(unsigned long long die_off) +{ + Dwarf_Die result; + char *name = NULL; + const char *diename; + + if (!die_off) + return NULL; + + if (!get_die_from_offset((Dwarf_Off) die_off, &result)) { + return NULL; + } + + diename = dwarf_diename(&result); + if (diename) + name = strdup(diename); + clean_dwfl_info(); + return name; +} + +/* + * Get the die offset given the die name + */ +unsigned long long +get_die_offset(char *sysname) +{ + dwarf_info.cmd = DWARF_INFO_GET_DIE; + dwarf_info.symbol_name = sysname; + dwarf_info.type_name = NULL; + dwarf_info.struct_size = NOT_FOUND_STRUCTURE; + dwarf_info.die_offset = 0; + + if (!sysname) + return 0; + + if (!get_debug_info()) + return 0; + + return (unsigned long long)dwarf_info.die_offset; +} + +/* + * Get length attribute given the die offset + */ +int +get_die_length(unsigned long long die_off, int flag) +{ + Dwarf_Die result, die_base; + int size = 0; + + if (!die_off) + return FALSE; + + if (!get_die_from_offset((Dwarf_Off) die_off, &result)) { + return FALSE; + } + + if (flag) { + size = dwarf_bytesize(&result); + goto out; + } + + get_die_type(&result, &die_base); + if (dwarf_tag(&die_base) == DW_TAG_array_type) { + dwarf_info.array_length = 0; + get_data_array_length(&result); + size = dwarf_info.array_length; + } else { + size = dwarf_bytesize(&die_base); + } +out: + clean_dwfl_info(); + return size; +} + +/* + * Set the dwarf_info with kernel/module debuginfo file information. + */ +int +set_dwarf_debuginfo(char *mod_name, char *os_release, + char *name_debuginfo, int fd_debuginfo) +{ + if (!mod_name) + return FALSE; + if (dwarf_info.module_name && !strcmp(dwarf_info.module_name, mod_name)) + return TRUE; + + /* Switching to different module. + * + * Close the file descriptor if previous module is != kernel and + * xen-syms. The reason is, vmlinux file will always be supplied + * by user and code to open/close kernel debuginfo file already + * in place. The module debuginfo files are opened only if '--config' + * option is used. This helps not to break the existing functionlity + * if called without '--config' option. + */ + + if (dwarf_info.module_name + && strcmp(dwarf_info.module_name, "vmlinux") + && strcmp(dwarf_info.module_name, "xen-syms")) { + if (dwarf_info.fd_debuginfo >= 0) + close(dwarf_info.fd_debuginfo); + if (dwarf_info.name_debuginfo) + free(dwarf_info.name_debuginfo); + } + if (dwarf_info.module_name) + free(dwarf_info.module_name); + + dwarf_info.fd_debuginfo = fd_debuginfo; + dwarf_info.name_debuginfo = name_debuginfo; + dwarf_info.module_name = strdup(mod_name); + + if (!strcmp(dwarf_info.module_name, "vmlinux") || + !strcmp(dwarf_info.module_name, "xen-syms")) + return TRUE; + + /* check to see whether module debuginfo is available */ + return search_module_debuginfo(os_release); +} + diff --git a/dwarf_info.h b/dwarf_info.h new file mode 100644 index 0000000..c5128f2 --- /dev/null +++ b/dwarf_info.h @@ -0,0 +1,85 @@ +/* + * dwarf_info.h + * + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _DWARF_INFO_H +#define _DWARF_INFO_H + +#define LEN_SRCFILE (100) + +#define NOT_FOUND_LONG_VALUE (-1) +#define FAILED_DWARFINFO (-2) +#define INVALID_STRUCTURE_DATA (-3) +#define FOUND_ARRAY_TYPE (LONG_MAX - 1) + +#define NOT_FOUND_SYMBOL (0) +#define NOT_FOUND_STRUCTURE (NOT_FOUND_LONG_VALUE) +#define NOT_FOUND_NUMBER (NOT_FOUND_LONG_VALUE) + +/* flags for dwarf_info.type_flag */ +#define TYPE_BASE 0x01 +#define TYPE_ARRAY 0x02 +#define TYPE_PTR 0x04 +#define TYPE_STRUCT 0x08 +#define TYPE_LIST_HEAD 0x10 + +enum { + DWARF_INFO_GET_STRUCT_SIZE, + DWARF_INFO_GET_MEMBER_OFFSET, + DWARF_INFO_GET_MEMBER_OFFSET_1ST_UNION, + DWARF_INFO_GET_MEMBER_ARRAY_LENGTH, + DWARF_INFO_GET_SYMBOL_ARRAY_LENGTH, + DWARF_INFO_GET_TYPEDEF_SIZE, + DWARF_INFO_GET_TYPEDEF_SRCNAME, + DWARF_INFO_GET_ENUM_NUMBER, + DWARF_INFO_CHECK_SYMBOL_ARRAY_TYPE, + DWARF_INFO_GET_SYMBOL_TYPE, + DWARF_INFO_GET_MEMBER_TYPE, + DWARF_INFO_GET_ENUMERATION_TYPE_SIZE, + DWARF_INFO_GET_DOMAIN_STRUCT, + DWARF_INFO_GET_DOMAIN_TYPEDEF, + DWARF_INFO_GET_DOMAIN_ARRAY, + DWARF_INFO_GET_DOMAIN_UNION, + DWARF_INFO_GET_DOMAIN_ENUM, + DWARF_INFO_GET_DOMAIN_REF, + DWARF_INFO_GET_DOMAIN_STRING, + DWARF_INFO_GET_DOMAIN_BASE, + DWARF_INFO_GET_DIE, +}; + +char *get_dwarf_module_name(void); +void get_fileinfo_of_debuginfo(int *fd, char **name); +unsigned long long get_symbol_addr(char *symname); +unsigned long get_next_symbol_addr(char *symname); +long get_structure_size(char *structname, int flag_typedef); +long get_pointer_size(void); +char *get_symbol_type_name(char *symname, int cmd, long *size, unsigned long *flag); +long get_member_offset(char *structname, char *membername, int cmd); +char *get_member_type_name(char *structname, char *membername, int cmd, long *size, unsigned long *flag); +long get_array_length(char *name01, char *name02, unsigned int cmd); +long get_enum_number(char *enum_name); +int get_source_filename(char *structname, char *src_name, int cmd); +long get_domain(char *symname, int cmd, unsigned long long *die); +int get_die_nfields(unsigned long long die_off); +int get_die_member(unsigned long long die_off, int index, long *offset, + char **name, int *nbits, int *fbits, unsigned long long *m_die); +int get_die_attr_type(unsigned long long die_off, int *type_flag, + unsigned long long *die_attr_off); +char *get_die_name(unsigned long long die_off); +unsigned long long get_die_offset(char *sysname); +int get_die_length(unsigned long long die_off, int flag); +int set_dwarf_debuginfo(char *mod_name, char *os_release, char *name_debuginfo, int fd_debuginfo); + +#endif /* DWARF_INFO_H */ + diff --git a/elf_info.c b/elf_info.c new file mode 100644 index 0000000..711601a --- /dev/null +++ b/elf_info.c @@ -0,0 +1,1268 @@ +/* + * elf_info.c + * + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> +#include <errno.h> +#include <gelf.h> +#include <libelf.h> + +#include "common.h" +#include "print_info.h" +#include "elf_info.h" +#include "makedumpfile.h" + +#define ELF32 (1) +#define ELF64 (2) + +#define VMCOREINFO_NOTE_NAME "VMCOREINFO" +#define VMCOREINFO_NOTE_NAME_BYTES (sizeof(VMCOREINFO_NOTE_NAME)) +#define VMCOREINFO_XEN_NOTE_NAME "VMCOREINFO_XEN" +#define VMCOREINFO_XEN_NOTE_NAME_BYTES (sizeof(VMCOREINFO_XEN_NOTE_NAME)) + +#define XEN_ELFNOTE_CRASH_INFO (0x1000001) + +struct pt_load_segment { + off_t file_offset; + off_t file_size; + unsigned long long phys_start; + unsigned long long phys_end; + unsigned long long virt_start; + unsigned long long virt_end; +}; + +static int nr_cpus; /* number of cpu */ +static off_t max_file_offset; + +/* + * File information about /proc/vmcore: + */ +static int fd_memory; +static char *name_memory; + +static int flags_memory; +#define MEMORY_ELF64 (1 << 0) +#define MEMORY_XEN (1 << 1) + +/* + * PT_LOAD information about /proc/vmcore: + */ +static unsigned int num_pt_loads; +static struct pt_load_segment *pt_loads; +static off_t offset_pt_load_memory; + +/* + * PT_NOTE information about /proc/vmcore: + */ +static off_t offset_pt_note_memory; +static unsigned long size_pt_note_memory; + +/* + * vmcoreinfo in /proc/vmcore: + */ +static off_t offset_vmcoreinfo; +static unsigned long size_vmcoreinfo; +static off_t offset_vmcoreinfo_xen; +static unsigned long size_vmcoreinfo_xen; + +/* + * erased information in /proc/vmcore: + */ +static off_t offset_eraseinfo; +static unsigned long size_eraseinfo; + +/* + * Xen information: + */ +static off_t offset_xen_crash_info; +static unsigned long size_xen_crash_info; + + +/* + * Internal functions. + */ +static int +check_elf_format(int fd, char *filename, int *phnum, unsigned int *num_load) +{ + int i; + Elf64_Ehdr ehdr64; + Elf64_Phdr load64; + Elf32_Ehdr ehdr32; + Elf32_Phdr load32; + + if (lseek(fd, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", filename, strerror(errno)); + return FALSE; + } + if (read(fd, &ehdr64, sizeof(Elf64_Ehdr)) != sizeof(Elf64_Ehdr)) { + ERRMSG("Can't read %s. %s\n", filename, strerror(errno)); + return FALSE; + } + if (lseek(fd, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", filename, strerror(errno)); + return FALSE; + } + if (read(fd, &ehdr32, sizeof(Elf32_Ehdr)) != sizeof(Elf32_Ehdr)) { + ERRMSG("Can't read %s. %s\n", filename, strerror(errno)); + return FALSE; + } + (*num_load) = 0; + if ((ehdr64.e_ident[EI_CLASS] == ELFCLASS64) + && (ehdr32.e_ident[EI_CLASS] != ELFCLASS32)) { + (*phnum) = ehdr64.e_phnum; + for (i = 0; i < ehdr64.e_phnum; i++) { + if (!get_elf64_phdr(fd, filename, i, &load64)) { + ERRMSG("Can't find Phdr %d.\n", i); + return FALSE; + } + if (load64.p_type == PT_LOAD) + (*num_load)++; + } + return ELF64; + + } else if ((ehdr64.e_ident[EI_CLASS] != ELFCLASS64) + && (ehdr32.e_ident[EI_CLASS] == ELFCLASS32)) { + (*phnum) = ehdr32.e_phnum; + for (i = 0; i < ehdr32.e_phnum; i++) { + if (!get_elf32_phdr(fd, filename, i, &load32)) { + ERRMSG("Can't find Phdr %d.\n", i); + return FALSE; + } + if (load32.p_type == PT_LOAD) + (*num_load)++; + } + return ELF32; + } + ERRMSG("Can't get valid ehdr.\n"); + return FALSE; +} + +static int +dump_Elf_load(Elf64_Phdr *prog, int num_load) +{ + struct pt_load_segment *pls; + + if (prog->p_type != PT_LOAD) { + ERRMSG("Not PT_LOAD.\n"); + return FALSE; + } + + pls = &pt_loads[num_load]; + pls->phys_start = prog->p_paddr; + pls->phys_end = pls->phys_start + prog->p_memsz; + pls->virt_start = prog->p_vaddr; + pls->virt_end = pls->virt_start + prog->p_memsz; + pls->file_offset = prog->p_offset; + pls->file_size = prog->p_filesz; + + DEBUG_MSG("LOAD (%d)\n", num_load); + DEBUG_MSG(" phys_start : %llx\n", pls->phys_start); + DEBUG_MSG(" phys_end : %llx\n", pls->phys_end); + DEBUG_MSG(" virt_start : %llx\n", pls->virt_start); + DEBUG_MSG(" virt_end : %llx\n", pls->virt_end); + + return TRUE; +} + +static off_t +offset_next_note(void *note) +{ + off_t offset; + Elf64_Nhdr *note64; + Elf32_Nhdr *note32; + + /* + * Both name and desc in ELF Note elements are padded to + * 4 byte boundary. + */ + if (is_elf64_memory()) { + note64 = (Elf64_Nhdr *)note; + offset = sizeof(Elf64_Nhdr) + + roundup(note64->n_namesz, 4) + + roundup(note64->n_descsz, 4); + } else { + note32 = (Elf32_Nhdr *)note; + offset = sizeof(Elf32_Nhdr) + + roundup(note32->n_namesz, 4) + + roundup(note32->n_descsz, 4); + } + return offset; +} + +static int +note_type(void *note) +{ + int type; + Elf64_Nhdr *note64; + Elf32_Nhdr *note32; + + if (is_elf64_memory()) { + note64 = (Elf64_Nhdr *)note; + type = note64->n_type; + } else { + note32 = (Elf32_Nhdr *)note; + type = note32->n_type; + } + return type; +} + +static int +note_namesz(void *note) +{ + int size; + Elf64_Nhdr *note64; + Elf32_Nhdr *note32; + + if (is_elf64_memory()) { + note64 = (Elf64_Nhdr *)note; + size = note64->n_namesz; + } else { + note32 = (Elf32_Nhdr *)note; + size = note32->n_namesz; + } + return size; +} + +static int +note_descsz(void *note) +{ + int size; + Elf64_Nhdr *note64; + Elf32_Nhdr *note32; + + if (is_elf64_memory()) { + note64 = (Elf64_Nhdr *)note; + size = note64->n_descsz; + } else { + note32 = (Elf32_Nhdr *)note; + size = note32->n_descsz; + } + return size; +} + +static off_t +offset_note_desc(void *note) +{ + off_t offset; + Elf64_Nhdr *note64; + Elf32_Nhdr *note32; + + if (is_elf64_memory()) { + note64 = (Elf64_Nhdr *)note; + offset = sizeof(Elf64_Nhdr) + roundup(note64->n_namesz, 4); + } else { + note32 = (Elf32_Nhdr *)note; + offset = sizeof(Elf32_Nhdr) + roundup(note32->n_namesz, 4); + } + return offset; +} + +static int +get_pt_note_info(void) +{ + int n_type, size_name, size_desc; + off_t offset, offset_desc; + char buf[VMCOREINFO_XEN_NOTE_NAME_BYTES]; + char note[MAX_SIZE_NHDR]; + + nr_cpus = 0; + offset = offset_pt_note_memory; + while (offset < offset_pt_note_memory + size_pt_note_memory) { + if (lseek(fd_memory, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek the dump memory(%s). %s\n", + name_memory, strerror(errno)); + return FALSE; + } + if (read(fd_memory, note, sizeof(note)) != sizeof(note)) { + ERRMSG("Can't read the dump memory(%s). %s\n", + name_memory, strerror(errno)); + return FALSE; + } + + n_type = note_type(note); + size_name = note_namesz(note); + size_desc = note_descsz(note); + offset_desc = offset + offset_note_desc(note); + + if (!size_name || size_name > sizeof(buf)) + goto next_note; + + if (read(fd_memory, &buf, sizeof(buf)) != sizeof(buf)) { + ERRMSG("Can't read the dump memory(%s). %s\n", + name_memory, strerror(errno)); + return FALSE; + } + + if (!strncmp(KEXEC_CORE_NOTE_NAME, buf, + KEXEC_CORE_NOTE_NAME_BYTES)) { + if (n_type == NT_PRSTATUS) { + nr_cpus++; + } + + } else if (!strncmp(VMCOREINFO_NOTE_NAME, buf, + VMCOREINFO_NOTE_NAME_BYTES)) { + if (n_type == 0) { + set_vmcoreinfo(offset_desc, size_desc); + } + /* + * Check whether /proc/vmcore contains vmcoreinfo, + * and get both the offset and the size. + */ + } else if (!strncmp(VMCOREINFO_XEN_NOTE_NAME, buf, + VMCOREINFO_XEN_NOTE_NAME_BYTES)) { + if (n_type == 0) { + offset_vmcoreinfo_xen = offset_desc; + size_vmcoreinfo_xen = size_desc; + } + /* + * Check whether /proc/vmcore contains xen's note. + */ + } else if (!strncmp("Xen", buf, 4)) { + if (n_type == XEN_ELFNOTE_CRASH_INFO) { + flags_memory |= MEMORY_XEN; + offset_xen_crash_info = offset_desc; + size_xen_crash_info = size_desc; + } + /* + * Check whether a source dumpfile contains eraseinfo. + * /proc/vmcore does not contain eraseinfo, because eraseinfo + * is added only by makedumpfile and makedumpfile does not + * create /proc/vmcore. + */ + } else if (!strncmp(ERASEINFO_NOTE_NAME, buf, + ERASEINFO_NOTE_NAME_BYTES)) { + if (n_type == 0) { + set_eraseinfo(offset_desc, size_desc); + } + } + + next_note: + offset += offset_next_note(note); + } + if (is_xen_memory()) + DEBUG_MSG("Xen kdump\n"); + else + DEBUG_MSG("Linux kdump\n"); + + return TRUE; +} + +#define UNINITIALIZED ((ulong)(-1)) +int set_kcore_vmcoreinfo(uint64_t vmcoreinfo_addr, uint64_t vmcoreinfo_len) +{ + int i; + ulong kvaddr; + off_t offset; + char note[MAX_SIZE_NHDR]; + int size_desc; + off_t offset_desc; + + offset = UNINITIALIZED; + kvaddr = (ulong)vmcoreinfo_addr + PAGE_OFFSET; + + for (i = 0; i < num_pt_loads; ++i) { + struct pt_load_segment *p = &pt_loads[i]; + if ((kvaddr >= p->virt_start) && (kvaddr < p->virt_end)) { + offset = (off_t)(kvaddr - p->virt_start) + + (off_t)p->file_offset; + break; + } + } + + if (offset == UNINITIALIZED) { + ERRMSG("Can't get the offset of VMCOREINFO(%s). %s\n", + name_memory, strerror(errno)); + return FALSE; + } + + if (lseek(fd_memory, offset, SEEK_SET) != offset) { + ERRMSG("Can't seek the dump memory(%s). %s\n", + name_memory, strerror(errno)); + return FALSE; + } + + if (read(fd_memory, note, MAX_SIZE_NHDR) != MAX_SIZE_NHDR) { + ERRMSG("Can't read the dump memory(%s). %s\n", + name_memory, strerror(errno)); + return FALSE; + } + + size_desc = note_descsz(note); + offset_desc = offset + offset_note_desc(note); + + set_vmcoreinfo(offset_desc, size_desc); + + return TRUE; +} + +/* + * External functions. + */ +int +get_elf64_phdr(int fd, char *filename, int index, Elf64_Phdr *phdr) +{ + off_t offset; + + offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * index; + + if (lseek(fd, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", filename, strerror(errno)); + return FALSE; + } + if (read(fd, phdr, sizeof(Elf64_Phdr)) != sizeof(Elf64_Phdr)) { + ERRMSG("Can't read %s. %s\n", filename, strerror(errno)); + return FALSE; + } + return TRUE; +} + +int +get_elf32_phdr(int fd, char *filename, int index, Elf32_Phdr *phdr) +{ + off_t offset; + + offset = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * index; + + if (lseek(fd, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", filename, strerror(errno)); + return FALSE; + } + if (read(fd, phdr, sizeof(Elf32_Phdr)) != sizeof(Elf32_Phdr)) { + ERRMSG("Can't read %s. %s\n", filename, strerror(errno)); + return FALSE; + } + return TRUE; +} + +/* + * Convert Physical Address to File Offset. + * If this function returns 0x0, File Offset isn't found. + * The File Offset 0x0 is in the ELF header. + * It is not in the memory image. + */ +off_t +paddr_to_offset(unsigned long long paddr) +{ + int i; + off_t offset; + struct pt_load_segment *pls; + + for (i = offset = 0; i < num_pt_loads; i++) { + pls = &pt_loads[i]; + if ((paddr >= pls->phys_start) + && (paddr < pls->phys_start + pls->file_size)) { + offset = (off_t)(paddr - pls->phys_start) + + pls->file_offset; + break; + } + } + return offset; +} + +/* + * Same as paddr_to_offset() but makes sure that the specified offset (hint) + * in the segment. + */ +off_t +paddr_to_offset2(unsigned long long paddr, off_t hint) +{ + int i; + off_t offset; + struct pt_load_segment *pls; + + for (i = offset = 0; i < num_pt_loads; i++) { + pls = &pt_loads[i]; + if ((paddr >= pls->phys_start) + && (paddr < pls->phys_start + pls->file_size) + && (hint >= pls->file_offset) + && (hint < pls->file_offset + pls->file_size)) { + offset = (off_t)(paddr - pls->phys_start) + + pls->file_offset; + break; + } + } + return offset; +} + +unsigned long long +page_head_to_phys_start(unsigned long long head_paddr) +{ + int i; + struct pt_load_segment *pls; + + for (i = 0; i < num_pt_loads; i++) { + pls = &pt_loads[i]; + if ((pls->phys_start <= head_paddr + info->page_size) + && (head_paddr < pls->phys_end)) { + return (pls->phys_start > head_paddr) ? + pls->phys_start : head_paddr; + } + } + + return 0; +} + +unsigned long long +page_head_to_phys_end(unsigned long long head_paddr) +{ + int i; + struct pt_load_segment *pls; + + for (i = 0; i < num_pt_loads; i++) { + pls = &pt_loads[i]; + if ((pls->phys_start <= head_paddr + info->page_size) + && (head_paddr < pls->phys_end)) { + return (pls->phys_end < head_paddr + info->page_size) ? + pls->phys_end : head_paddr + info->page_size; + } + } + + return 0; +} + +/* + * Calculate a start File Offset of PT_LOAD from a File Offset + * of a page. If this function returns 0x0, the input page is + * not in the memory image. + */ +off_t +offset_to_pt_load_start(off_t offset) +{ + int i; + off_t pt_load_start; + struct pt_load_segment *pls; + + for (i = pt_load_start = 0; i < num_pt_loads; i++) { + pls = &pt_loads[i]; + if ((offset >= pls->file_offset) + && (offset < pls->file_offset + + (pls->phys_end - pls->phys_start))) { + pt_load_start = pls->file_offset; + break; + } + } + return pt_load_start; +} + +/* + * Calculate a end File Offset of PT_LOAD from a File Offset + * of a page. If this function returns 0x0, the input page is + * not in the memory image. + */ +off_t +offset_to_pt_load_end(off_t offset) +{ + int i; + off_t pt_load_end; + struct pt_load_segment *pls; + + for (i = pt_load_end = 0; i < num_pt_loads; i++) { + pls = &pt_loads[i]; + if ((offset >= pls->file_offset) + && (offset < pls->file_offset + + (pls->phys_end - pls->phys_start))) { + pt_load_end = (off_t)(pls->file_offset + + (pls->phys_end - pls->phys_start)); + break; + } + } + return pt_load_end; +} + +/* + * Judge whether the page is fractional or not. + */ +int +page_is_fractional(off_t page_offset) +{ + if (page_offset % info->page_size != 0) + return TRUE; + + if (offset_to_pt_load_end(page_offset) - page_offset + < info->page_size) + return TRUE; + + return FALSE; +} + +unsigned long long +vaddr_to_paddr_general(unsigned long long vaddr) +{ + int i; + unsigned long long paddr = NOT_PADDR; + struct pt_load_segment *pls; + + if (pt_loads == NULL) + return NOT_PADDR; + + for (i = 0; i < num_pt_loads; i++) { + pls = &pt_loads[i]; + if ((vaddr >= pls->virt_start) + && (vaddr < pls->virt_end)) { + paddr = (off_t)(vaddr - pls->virt_start) + + pls->phys_start; + break; + } + } + return paddr; +} + +/* + * This function is slow because it doesn't use the memory. + * It is useful at few calls like get_str_osrelease_from_vmlinux(). + */ +off_t +vaddr_to_offset_slow(int fd, char *filename, unsigned long long vaddr) +{ + off_t offset = 0; + int i, phnum, flag_elf64, elf_format; + unsigned int num_load; + Elf64_Phdr load64; + Elf32_Phdr load32; + + elf_format = check_elf_format(fd, filename, &phnum, &num_load); + if (elf_format == ELF64) + flag_elf64 = TRUE; + else if (elf_format == ELF32) + flag_elf64 = FALSE; + else + return 0; + + for (i = 0; i < phnum; i++) { + if (flag_elf64) { /* ELF64 */ + if (!get_elf64_phdr(fd, filename, i, &load64)) { + ERRMSG("Can't find Phdr %d.\n", i); + return 0; + } + if (load64.p_type != PT_LOAD) + continue; + + if ((vaddr < load64.p_vaddr) + || (load64.p_vaddr + load64.p_filesz <= vaddr)) + continue; + + offset = load64.p_offset + (vaddr - load64.p_vaddr); + break; + } else { /* ELF32 */ + if (!get_elf32_phdr(fd, filename, i, &load32)) { + ERRMSG("Can't find Phdr %d.\n", i); + return 0; + } + if (load32.p_type != PT_LOAD) + continue; + + if ((vaddr < load32.p_vaddr) + || (load32.p_vaddr + load32.p_filesz <= vaddr)) + continue; + + offset = load32.p_offset + (vaddr - load32.p_vaddr); + break; + } + } + return offset; +} + +unsigned long long +get_max_paddr(void) +{ + int i; + unsigned long long max_paddr = 0; + struct pt_load_segment *pls; + + for (i = 0; i < num_pt_loads; i++) { + pls = &pt_loads[i]; + if (max_paddr < pls->phys_end) + max_paddr = pls->phys_end; + } + return max_paddr; +} + +/* + * Find the LOAD segment which is closest to the requested + * physical address within a given distance. + * If there is no such segment, return a negative number. + */ +int +closest_pt_load(unsigned long long paddr, unsigned long distance) +{ + int i, bestidx; + struct pt_load_segment *pls; + unsigned long bestdist; + + bestdist = distance; + bestidx = -1; + for (i = 0; i < num_pt_loads; ++i) { + pls = &pt_loads[i]; + if (paddr >= pls->phys_end) + continue; + if (paddr >= pls->phys_start) + return i; /* Exact match */ + if (bestdist > pls->phys_start - paddr) { + bestdist = pls->phys_start - paddr; + bestidx = i; + } + } + return bestidx; +} + +int +get_elf64_ehdr(int fd, char *filename, Elf64_Ehdr *ehdr) +{ + if (lseek(fd, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", filename, strerror(errno)); + return FALSE; + } + if (read(fd, ehdr, sizeof(Elf64_Ehdr)) != sizeof(Elf64_Ehdr)) { + ERRMSG("Can't read %s. %s\n", filename, strerror(errno)); + return FALSE; + } + if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) { + ERRMSG("Can't get valid e_ident.\n"); + return FALSE; + } + return TRUE; +} + +int +get_elf32_ehdr(int fd, char *filename, Elf32_Ehdr *ehdr) +{ + if (lseek(fd, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", filename, strerror(errno)); + return FALSE; + } + if (read(fd, ehdr, sizeof(Elf32_Ehdr)) != sizeof(Elf32_Ehdr)) { + ERRMSG("Can't read %s. %s\n", filename, strerror(errno)); + return FALSE; + } + if (ehdr->e_ident[EI_CLASS] != ELFCLASS32) { + ERRMSG("Can't get valid e_ident.\n"); + return FALSE; + } + return TRUE; +} + +int +get_elf_loads(int fd, char *filename) +{ + int i, j, phnum, elf_format; + Elf64_Phdr phdr; + + /* + * Check ELF64 or ELF32. + */ + elf_format = check_elf_format(fd, filename, &phnum, &num_pt_loads); + if (elf_format == ELF64) + flags_memory |= MEMORY_ELF64; + else if (elf_format != ELF32) + return FALSE; + + if (!num_pt_loads) { + ERRMSG("Can't get the number of PT_LOAD.\n"); + return FALSE; + } + + /* + * The below file information will be used as /proc/vmcore. + */ + fd_memory = fd; + name_memory = filename; + + pt_loads = calloc(sizeof(struct pt_load_segment), num_pt_loads); + if (pt_loads == NULL) { + ERRMSG("Can't allocate memory for the PT_LOAD. %s\n", + strerror(errno)); + return FALSE; + } + for (i = 0, j = 0; i < phnum; i++) { + if (!get_phdr_memory(i, &phdr)) + return FALSE; + + if (phdr.p_type != PT_LOAD) + continue; + + if (j >= num_pt_loads) + return FALSE; + if (!dump_Elf_load(&phdr, j)) + return FALSE; + j++; + } + + return TRUE; +} + +static int exclude_segment(struct pt_load_segment **pt_loads, + unsigned int *num_pt_loads, uint64_t start, uint64_t end) +{ + int i, j, tidx = -1; + unsigned long long vstart, vend, kvstart, kvend; + struct pt_load_segment temp_seg = {0}; + kvstart = (ulong)start + PAGE_OFFSET; + kvend = (ulong)end + PAGE_OFFSET; + unsigned long size; + + for (i = 0; i < (*num_pt_loads); i++) { + vstart = (*pt_loads)[i].virt_start; + vend = (*pt_loads)[i].virt_end; + if (kvstart < vend && kvend > vstart) { + if (kvstart != vstart && kvend != vend) { + /* Split load segment */ + temp_seg.phys_start = end; + temp_seg.phys_end = (*pt_loads)[i].phys_end; + temp_seg.virt_start = kvend; + temp_seg.virt_end = vend; + temp_seg.file_offset = (*pt_loads)[i].file_offset + + temp_seg.virt_start - (*pt_loads)[i].virt_start; + temp_seg.file_size = temp_seg.phys_end + - temp_seg.phys_start; + + (*pt_loads)[i].virt_end = kvstart; + (*pt_loads)[i].phys_end = start; + (*pt_loads)[i].file_size -= temp_seg.file_size; + + tidx = i+1; + } else if (kvstart != vstart) { + (*pt_loads)[i].phys_end = start; + (*pt_loads)[i].virt_end = kvstart; + } else { + (*pt_loads)[i].phys_start = end; + (*pt_loads)[i].virt_start = kvend; + } + (*pt_loads)[i].file_size -= (end -start); + } + } + /* Insert split load segment, if any. */ + if (tidx >= 0) { + size = (*num_pt_loads + 1) * sizeof((*pt_loads)[0]); + (*pt_loads) = realloc((*pt_loads), size); + if (!(*pt_loads)) { + ERRMSG("Cannot realloc %ld bytes: %s\n", + size + 0UL, strerror(errno)); + exit(1); + } + for (j = (*num_pt_loads - 1); j >= tidx; j--) + (*pt_loads)[j+1] = (*pt_loads)[j]; + (*pt_loads)[tidx] = temp_seg; + (*num_pt_loads)++; + } + return 0; +} + + +int get_kcore_dump_loads(void) +{ + struct pt_load_segment *pls; + int i, j, loads = 0; + + for (i = 0; i < num_pt_loads; ++i) { + struct pt_load_segment *p = &pt_loads[i]; + if (p->phys_start == NOT_PADDR + || !is_phys_addr(p->virt_start)) + continue; + loads++; + } + + if (!loads) { + ERRMSG("Can't get the correct number of PT_LOAD. %s\n", + strerror(errno)); + return FALSE; + } + + pls = calloc(sizeof(struct pt_load_segment), loads); + if (pls == NULL) { + ERRMSG("Can't allocate memory for the PT_LOAD. %s\n", + strerror(errno)); + return FALSE; + } + + for (i = 0, j = 0; i < num_pt_loads; ++i) { + struct pt_load_segment *p = &pt_loads[i]; + if (p->phys_start == NOT_PADDR + || !is_phys_addr(p->virt_start)) + continue; + if (j >= loads) { + free(pls); + return FALSE; + } + + if (j == 0) { + offset_pt_load_memory = p->file_offset; + if (offset_pt_load_memory == 0) { + ERRMSG("Can't get the offset of page data.\n"); + free(pls); + return FALSE; + } + } + + pls[j] = *p; + j++; + } + + free(pt_loads); + pt_loads = pls; + num_pt_loads = loads; + + for (i = 0; i < crash_reserved_mem_nr; i++) { + exclude_segment(&pt_loads, &num_pt_loads, + crash_reserved_mem[i].start, crash_reserved_mem[i].end + 1); + } + + max_file_offset = 0; + for (i = 0; i < num_pt_loads; ++i) { + struct pt_load_segment *p = &pt_loads[i]; + max_file_offset = MAX(max_file_offset, + p->file_offset + p->phys_end - p->phys_start); + } + + for (i = 0; i < num_pt_loads; ++i) { + struct pt_load_segment *p = &pt_loads[i]; + DEBUG_MSG("LOAD (%d)\n", i); + DEBUG_MSG(" phys_start : %llx\n", p->phys_start); + DEBUG_MSG(" phys_end : %llx\n", p->phys_end); + DEBUG_MSG(" virt_start : %llx\n", p->virt_start); + DEBUG_MSG(" virt_end : %llx\n", p->virt_end); + } + + return TRUE; +} + +/* + * Get ELF information about /proc/vmcore. + */ +int +get_elf_info(int fd, char *filename) +{ + int i, j, phnum, elf_format; + Elf64_Phdr phdr; + + /* + * Check ELF64 or ELF32. + */ + elf_format = check_elf_format(fd, filename, &phnum, &num_pt_loads); + if (elf_format == ELF64) + flags_memory |= MEMORY_ELF64; + else if (elf_format != ELF32) + return FALSE; + + if (!num_pt_loads) { + ERRMSG("Can't get the number of PT_LOAD.\n"); + return FALSE; + } + + /* + * The below file information will be used as /proc/vmcore. + */ + fd_memory = fd; + name_memory = filename; + + pt_loads = calloc(sizeof(struct pt_load_segment), num_pt_loads); + if (pt_loads == NULL) { + ERRMSG("Can't allocate memory for the PT_LOAD. %s\n", + strerror(errno)); + return FALSE; + } + for (i = 0, j = 0; i < phnum; i++) { + if (!get_phdr_memory(i, &phdr)) + return FALSE; + + if (phdr.p_type == PT_NOTE) { + set_pt_note(phdr.p_offset, phdr.p_filesz); + } + if (phdr.p_type != PT_LOAD) + continue; + + if (j == 0) { + offset_pt_load_memory = phdr.p_offset; + if (offset_pt_load_memory == 0) { + ERRMSG("Can't get the offset of page data.\n"); + return FALSE; + } + } + if (j >= num_pt_loads) + return FALSE; + if(!dump_Elf_load(&phdr, j)) + return FALSE; + j++; + } + max_file_offset = 0; + for (i = 0; i < num_pt_loads; ++i) { + struct pt_load_segment *p = &pt_loads[i]; + max_file_offset = MAX(max_file_offset, + p->file_offset + p->phys_end - p->phys_start); + } + if (!has_pt_note()) { + ERRMSG("Can't find PT_NOTE Phdr.\n"); + return FALSE; + } + if (!get_pt_note_info()) { + ERRMSG("Can't get PT_NOTE information.\n"); + return FALSE; + } + return TRUE; +} + +void +free_elf_info(void) +{ + free(pt_loads); + pt_loads = NULL; +} + +int +is_elf64_memory(void) +{ + return (flags_memory & MEMORY_ELF64); +} + +int +is_xen_memory(void) +{ + return (flags_memory & MEMORY_XEN); +} + +int +get_phnum_memory(void) +{ + int phnum; + Elf64_Ehdr ehdr64; + Elf32_Ehdr ehdr32; + + if (is_elf64_memory()) { /* ELF64 */ + if (!get_elf64_ehdr(fd_memory, name_memory, &ehdr64)) { + ERRMSG("Can't get ehdr64.\n"); + return FALSE; + } + phnum = ehdr64.e_phnum; + } else { /* ELF32 */ + if (!get_elf32_ehdr(fd_memory, name_memory, &ehdr32)) { + ERRMSG("Can't get ehdr32.\n"); + return FALSE; + } + phnum = ehdr32.e_phnum; + } + return phnum; +} + +int +get_phdr_memory(int index, Elf64_Phdr *phdr) +{ + Elf32_Phdr phdr32; + + if (is_elf64_memory()) { /* ELF64 */ + if (!get_elf64_phdr(fd_memory, name_memory, index, phdr)) { + ERRMSG("Can't find Phdr %d.\n", index); + return FALSE; + } + } else { + if (!get_elf32_phdr(fd_memory, name_memory, index, &phdr32)) { + ERRMSG("Can't find Phdr %d.\n", index); + return FALSE; + } + memset(phdr, 0, sizeof(Elf64_Phdr)); + phdr->p_type = phdr32.p_type; + phdr->p_flags = phdr32.p_flags; + phdr->p_offset = phdr32.p_offset; + phdr->p_vaddr = phdr32.p_vaddr; + phdr->p_paddr = phdr32.p_paddr; + phdr->p_filesz = phdr32.p_filesz; + phdr->p_memsz = phdr32.p_memsz; + phdr->p_align = phdr32.p_align; + } + return TRUE; +} + +off_t +get_offset_pt_load_memory(void) +{ + return offset_pt_load_memory; +} + +int +get_pt_load(int idx, + unsigned long long *phys_start, + unsigned long long *phys_end, + unsigned long long *virt_start, + unsigned long long *virt_end) +{ + struct pt_load_segment *pls; + + if (num_pt_loads <= idx) + return FALSE; + + pls = &pt_loads[idx]; + + if (phys_start) + *phys_start = pls->phys_start; + if (phys_end) + *phys_end = pls->phys_end; + if (virt_start) + *virt_start = pls->virt_start; + if (virt_end) + *virt_end = pls->virt_end; + + return TRUE; +} + +int +get_pt_load_extents(int idx, + unsigned long long *phys_start, + unsigned long long *phys_end, + off_t *file_offset, + off_t *file_size) +{ + struct pt_load_segment *pls; + + if (num_pt_loads <= idx) + return FALSE; + + pls = &pt_loads[idx]; + + if (phys_start) + *phys_start = pls->phys_start; + if (phys_end) + *phys_end = pls->phys_end; + if (file_offset) + *file_offset = pls->file_offset; + if (file_size) + *file_size = pls->file_size; + + return TRUE; +} + +unsigned int +get_num_pt_loads(void) +{ + return num_pt_loads; +} + +void +set_nr_cpus(int num) +{ + nr_cpus = num; +} + +int +get_nr_cpus(void) +{ + return nr_cpus; +} + +int +has_pt_note(void) +{ + if (info->flag_sadump) { + if (size_pt_note_memory) + return TRUE; + } else if (offset_pt_note_memory && size_pt_note_memory) + return TRUE; + return FALSE; +} + +void +set_pt_note(off_t offset, unsigned long size) +{ + offset_pt_note_memory = offset; + size_pt_note_memory = size; +} + +void +get_pt_note(off_t *offset, unsigned long *size) +{ + if (offset) + *offset = offset_pt_note_memory; + if (size) + *size = size_pt_note_memory; +} + +int +has_vmcoreinfo(void) +{ + if (offset_vmcoreinfo && size_vmcoreinfo) + return TRUE; + return FALSE; +} + +void +set_vmcoreinfo(off_t offset, unsigned long size) +{ + offset_vmcoreinfo = offset; + size_vmcoreinfo = size; +} + +void +get_vmcoreinfo(off_t *offset, unsigned long *size) +{ + if (offset) + *offset = offset_vmcoreinfo; + if (size) + *size = size_vmcoreinfo; +} + +int +has_vmcoreinfo_xen(void) +{ + if (offset_vmcoreinfo_xen && size_vmcoreinfo_xen) + return TRUE; + return FALSE; +} + +void +get_vmcoreinfo_xen(off_t *offset, unsigned long *size) +{ + if (offset) + *offset = offset_vmcoreinfo_xen; + if (size) + *size = size_vmcoreinfo_xen; +} + +void +get_xen_crash_info(off_t *offset, unsigned long *size) +{ + if (offset) + *offset = offset_xen_crash_info; + if (size) + *size = size_xen_crash_info; +} + +int +has_eraseinfo(void) +{ + if (offset_eraseinfo && size_eraseinfo) + return TRUE; + return FALSE; +} + +void +get_eraseinfo(off_t *offset, unsigned long *size) +{ + if (offset) + *offset = offset_eraseinfo; + if (size) + *size = size_eraseinfo; +} + +void +set_eraseinfo(off_t offset, unsigned long size) +{ + offset_eraseinfo = offset; + size_eraseinfo = size; +} + +off_t +get_max_file_offset(void) +{ + return max_file_offset; +} diff --git a/elf_info.h b/elf_info.h new file mode 100644 index 0000000..cd4ffa6 --- /dev/null +++ b/elf_info.h @@ -0,0 +1,95 @@ +/* + * elf_info.h + * + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _ELF_INFO_H +#define _ELF_INFO_H + +#include <elf.h> +#include <sys/types.h> + +#define KEXEC_CORE_NOTE_NAME "CORE" +#define KEXEC_CORE_NOTE_NAME_BYTES sizeof(KEXEC_CORE_NOTE_NAME) + +#define ERASEINFO_NOTE_NAME "ERASEINFO" +#define ERASEINFO_NOTE_NAME_BYTES (sizeof(ERASEINFO_NOTE_NAME)) + +#define MAX_SIZE_NHDR MAX(sizeof(Elf64_Nhdr), sizeof(Elf32_Nhdr)) + +int get_elf64_phdr(int fd, char *filename, int index, Elf64_Phdr *phdr); +int get_elf32_phdr(int fd, char *filename, int index, Elf32_Phdr *phdr); + +off_t paddr_to_offset(unsigned long long paddr); +off_t paddr_to_offset2(unsigned long long paddr, off_t hint); +unsigned long long page_head_to_phys_start(unsigned long long head_paddr); +unsigned long long page_head_to_phys_end(unsigned long long head_paddr); +off_t offset_to_pt_load_start(off_t offset); +off_t offset_to_pt_load_end(off_t offset); +unsigned long long vaddr_to_paddr_general(unsigned long long vaddr); +off_t vaddr_to_offset_slow(int fd, char *filename, unsigned long long vaddr); +unsigned long long get_max_paddr(void); +int closest_pt_load(unsigned long long paddr, unsigned long distance); + +int page_is_fractional(off_t page_offset); + +int get_elf64_ehdr(int fd, char *filename, Elf64_Ehdr *ehdr); +int get_elf32_ehdr(int fd, char *filename, Elf32_Ehdr *ehdr); +int get_elf_info(int fd, char *filename); +void free_elf_info(void); +int get_elf_loads(int fd, char *filename); +int set_kcore_vmcoreinfo(uint64_t vmcoreinfo_addr, uint64_t vmcoreinfo_len); +int get_kcore_dump_loads(void); + +int is_elf64_memory(void); +int is_xen_memory(void); + +int get_phnum_memory(void); +int get_phdr_memory(int index, Elf64_Phdr *phdr); +off_t get_offset_pt_load_memory(void); +int get_pt_load(int idx, + unsigned long long *phys_start, + unsigned long long *phys_end, + unsigned long long *virt_start, + unsigned long long *virt_end); +int get_pt_load_extents(int idx, + unsigned long long *phys_start, + unsigned long long *phys_end, + off_t *file_offset, + off_t *file_size); +unsigned int get_num_pt_loads(void); + +void set_nr_cpus(int num); +int get_nr_cpus(void); + +int has_pt_note(void); +void set_pt_note(off_t offset, unsigned long size); +void get_pt_note(off_t *offset, unsigned long *size); + +int has_vmcoreinfo(void); +void set_vmcoreinfo(off_t offset, unsigned long size); +void get_vmcoreinfo(off_t *offset, unsigned long *size); + +int has_vmcoreinfo_xen(void); +void get_vmcoreinfo_xen(off_t *offset, unsigned long *size); +void get_xen_crash_info(off_t *offset, unsigned long *size); + +int has_eraseinfo(void); +void get_eraseinfo(off_t *offset, unsigned long *size); +void set_eraseinfo(off_t offset, unsigned long size); + +off_t get_max_file_offset(void); + +#endif /* ELF_INFO_H */ + + diff --git a/eppic_scripts/README b/eppic_scripts/README new file mode 100644 index 0000000..3038d15 --- /dev/null +++ b/eppic_scripts/README @@ -0,0 +1,300 @@ +=================================== + Eppic scripts README +================================== + +These eppic scripts are based on the fedora 19 kernel. eppic scripts +naming convention follows the format: +<eppic_script name>-<valid from kernel version>_to_<valid until kernel version>.c + +For example consider dir_names_3_10_to_3_13.c. This script would scrub +sensitive information successfully, when run against kernel version >= 3.10 +and kernel version <= 3.13. + +1. Eppic script: proc_names_3_10_to_4_8.c + Description: Scrubs executable name of each user process + + Explanation: + Walks all processes via the tasks lists starting from init_task + + extern struct task_struct init_task; + + struct task_struct { + ... + struct list_head tasks; + ... + char comm[TASK_COMM_LEN]; /* executable name excluding path */ + ... + }; + + For each user space process clear executable name + + struct task_struct *tsk; + list_for_each_entry(tsk, &init_task, tasks) { + if (tsk->mm) + memset(tsk->comm, 0, TASK_COMM_LEN); + } + + +2. Eppic script: dir_names_3_10_to_3_13.c + Description: Scrubs filenames of cached dentries + + Explanation: + i) iterate over all mounted filesystems + + struct vfsmount { + struct list_head mnt_hash; + ... + struct dentry *mnt_root; /* root of the mounted tree */ + ... + }; + + for (u = 0; i < HASH_SIZE; u++) { + struct vfsmount *mnt; + list_for_each_entry(mnt, &mount_hashtable[u], mnt_hash) { + struct dentry *root; + root = mnt->mnt_root; + ... + } + } + + ii) recursively walk the dentries of each tree starting from root dentry + and clear d_name and d_iname + + struct dentry { + ... + struct qstr d_name; + ... + unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + ... + struct list_head d_subdirs; /* our children */ + ... + }; + + void walk_dentries(struct dentry *dentry) + { + struct dentry *child; + memset(dentry->d_iname, 0, DNAME_INLINE_LEN); + memset(dentry->d_name.name, 0, dentry->d_name.len); + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) + walk_dentries(child); + } + +3. Eppic script: keyring_3_10_to_4_3.c + Description: Scrubs all entries in the keyring + + Explanation: + Scan the keyring_name_hash hash table + + static struct list_head keyring_name_hash[KEYRING_NAME_HASH_SIZE]; + + for (i = 0; i < KEYRING_NAME_HASH_SIZE; i++) + if (!list_empty(&keyring_name_hash[i])) { + ... + } + + For each non-empty list walk all keyring entries + + struct key { + ... + struct key_type *type; /* type of key */ + ... + unsigned short datalen; /* payload data length */ + ... + union { + struct list_head link; + ... + } type_data; + ... + union { + unsigned long value; + void __rcu *rcudata; + void *data; + struct keyring_list __rcu *subscriptions; + } payload; + }; + + struct key *key; + list_for_each_entry(key, &keyring_name_hash[i], type_data.link) { + ... + } + + Clear value/rcudata/data dependent on the type of the key. + +4. Eppic script: ap_messages_3_10_to_4_8.c + Description: Clear the message data of all ap_bus requests + + Explanation: + Walk all devices in the LIST_HEAD(ap_device_list); + + struct ap_device { + ... + struct list_head list; /* private list of all AP devices. */ + ... + struct list_head pendingq; /* List of message sent to AP queue. */ + int pendingq_count; /* # requests on pendingq list. */ + struct list_head requestq; /* List of message yet to be sent. */ + int requestq_count; /* # requests on requestq list. */ + ... + }; + + struct ap_device *device; + list_for_each_entry(device, &ap_device_list, list) { + ... + } + + For each ap device walk the pendingq and requestq list + + struct ap_message { + struct list_head list; /* Request queueing. */ + ... + void *message; /* Pointer to message buffer. */ + size_t length; /* Message length. */ + ... + }; + + struct ap_message *apmsg; + list_for_each_entry(apmsg, &device->pendingq, list) { + ... + } + list_for_each_entry(apmsg, &device->requestq, list) { + ... + } + + For each message in pendingq and requestq clear the message + + memset(apmsg->message, 0, apmsg->length); + +5. Eppic script: tcp_sk_buf_3_10_to_4_8.c + Description: Scrub data in tcp socket buffers + + Explanation: + Find tcp domain sockets (struct sock *sk) + + tcp sockets: + + Iterate from 0 to INET_LHTABLE_SIZE and get inet_list_hashbucket from + tcp_hash_info.listening_hash[<index>] + + for (i = 0; i < INET_LHTABLE_SIZE; i++) { + struct inet_listen_hashbucket *ilb = &tcp_hashinfo.listening_hash[i]; + } + For each hash bucket iterate over ilb->head null list to get sockets: + struct sock *sk; + sk_nulls_for_each(sk, node, &ilb->head) { + ... + } + + + For each socket iterate over the socket buffers in + sk_receive_queue and sk_write_queue: + + struct sock { + ... + struct sk_buff_head sk_receive_queue; + ... + struct sk_buff_head sk_write_queue; + ... + }; + + struct sk_buff_head { + struct sk_buff *next; + struct sk_buff *prev; + }; + + For each struct sk_buff in the two lists clear the memory referenced + by skb->data / skb->data_len: + + struct sk_buff { + ... + unsigned int data_len; + ... + unsigned char *data; + ... + }; + +6. Eppic script: udp_sk_buf_3_10_to_4_8.c + Description: Scrub data of udp socket buffers + + Explanation: + Find all udp sockets (struct sock *sk) + + udp sockets: + + Iterate from 0 to udp_table->mask and get udp_hslot from hash table: + for (i = 0; i < udp->table->mask; i++) { + struct udp_hslot *hslot = udp_table->hash[i]; + ... + } + + For each hslot iterate over hslot->head null list to get sockets: + struct sock *sk; + sk_nulls_for_each(sk, node, &hslot->head) { + ... + } + + For each socket iterate over the socket buffers in + sk_receive_queue and sk_write_queue. + + For each struct sk_buff in the two lists clear the memory referenced + by skb->data / skb->data_len. + +7. Eppic script: unix_sk_buff_3_10_to_4_8.c + Description: Scrub data of unix socket buffers + + Explanation: + Iterate from 0 to UNIX_HASH_SIZE and then walk the hlist in + for (i = 0; i < UNIX_HASH_SIZE; i++) { + struct list_head *list = &unix_socket_table[i]; + ... + } + + Walk each non-empty list in unix_socket_table + struct sock *sk; + sk_for_each(sk, node, &unix_socket_table[i]) + + For each socket iterate over the socket buffers in + sk_receive_queue and sk_write_queue. + + For each struct sk_buff in the two lists clear the memory referenced + by skb->data / skb->data_len. + +8. Eppic script: vhost_net_buffers_3_10_to_3_18.c + Description: Scrub socket buffers of guest network I/O + + Explanation: + Scrub socket buffers of guest network I/O + + vhost_net instance will be attached to the file's private data. + To get to the right file check the fdtable for each task, if the file + has registered its fops with vhost_net_open, if so we can retreive the + file's private data. + + if (task->files->fdt->fd[i]->f_op->open == &vhost_net_open) + struct vhost_net *net = f->private_data; + + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_MAX]; + struct vhost_virtqueue *vq = &nvq->vq; + struct socket *sock = vq->private_data; + struct sock *sk = sock->sk; + + struct sk_buff *next = sk->sk_receive_queue.next; + struct sk_buff *prev = sk->sk_receive_queue.prev; + + Scrub next->data till the end of the sk_receive_queue and + sk_write_queue list + + +9. Eppic script: vhost_scsi_buffers_3_10_to_4_8.c + Description: Scrub buffers involved in guest block I/O + + Explanation: + vhost_scsi instance will be attached to the file's private data. + to get to the right file check the fdtable for each task, if the + file has registered its fops with vhost_net_open, if so we can + retreive the file's private data. + + if (task->files->fdt->fd[i]->f_op->open == &vhost_scsi_open) + vhost_scsi *vs = task->files->fdt->fd[i]->private_data; + + struct vhost_virtqueue *vq = (struct vhost_virtqueue *)vs->vqs[i].vq; + scrub vq->iov[j].iov_base diff --git a/eppic_scripts/ap_messages_3_10_to_4_8.c b/eppic_scripts/ap_messages_3_10_to_4_8.c new file mode 100644 index 0000000..849e9bc --- /dev/null +++ b/eppic_scripts/ap_messages_3_10_to_4_8.c @@ -0,0 +1,82 @@ +string +ap_device_opt() +{ + return "l"; +} + +string +ap_device_usage() +{ + return "\n"; +} + +static void +ap_device_showusage() +{ + printf("usage : ap_device %s", ap_device_usage()); +} + +string +ap_device_help() +{ + return "Help"; +} + +int +ap_device() +{ + int i; + struct list_head *next; + struct list_head *head; + struct ap_device *off = 0; + + head = (struct list_head *)&ap_device_list; + next = (struct list_head *)head->next; + + if (!next) + return 1; + + while (next != head) + { + struct ap_device *device; + struct list_head *next1, *head1; + + device = (struct ap_device *)((unsigned long)next - ((unsigned long)&(off->list))); + + head1 = (struct list_head *)&(device->pendingq); + next1 = (struct list_head *)device->pendingq.next; + + while (next1 != head1) + { + struct ap_message *apmsg; + apmsg = (struct ap_message *)next1; + + if (apmsg->length) { + memset((char *)apmsg->message, 'L', apmsg->length); + memset((char *)&(apmsg->length), 'L', 0x8); + } + + next1 = (struct list_head *)apmsg->list.next; + } + + head1 = (struct list_head *)&(device->requestq); + next1 = (struct list_head *)device->requestq.next; + + while (next1 != head1) + { + struct ap_message *apmsg; + apmsg = (struct ap_message *)next1; + + if (apmsg->length) { + memset((char *)apmsg->message, 'L', apmsg->length); + memset((char *)&(apmsg->length), 'L', 0x8); + } + + next1 = (struct list_head *)apmsg->list.next; + } + + next = (struct list_head *)device->list.next; + } + + return 1; +} diff --git a/eppic_scripts/dir_names_3_10_to_3_13.c b/eppic_scripts/dir_names_3_10_to_3_13.c new file mode 100644 index 0000000..dbe6d00 --- /dev/null +++ b/eppic_scripts/dir_names_3_10_to_3_13.c @@ -0,0 +1,78 @@ +string +vfs_opt() +{ + return "l"; +} + +string +vfs_usage() +{ + return "\n"; +} + +static void +vfs_showusage() +{ + printf("usage : vfs %s", vfs_usage()); +} + +string +vfs_help() +{ + return "Help"; +} + +void +rm_names(struct dentry *dir) +{ + struct list_head *next, *head; + + memset(dir->d_iname, 0, 0x20); + memset(dir->d_name.name, 0, 0x20); + + head = (struct list_head *)&(dir->d_subdirs); + next = (struct list_head *)dir->d_subdirs.next; + + while (next != head) + { + struct dentry *child, *off = 0; + + child = (struct dentry *)((unsigned long)next - (unsigned long)&(off->d_u)); + rm_names(child); + next = child->d_u.d_child.next; + } + + return; +} + +int +vfs() +{ + int i; + struct list_head *tab; + + tab = (struct list_head *)mount_hashtable; + + for (i = 0; i < 256; i++) + { + struct list_head *head, *next; + + head = (struct list_head *) (tab + i); + next = (struct list_head *) head->next; + + if (!next) + continue; + + while (next != head) + { + struct mount *mntfs; + struct dentry *root; + + mntfs = (struct mount *)((unsigned long)next); + root = (struct dentry *)mntfs->mnt.mnt_root; + rm_names(root); + next = mntfs->mnt_hash.next; + } + } + return 1; +} diff --git a/eppic_scripts/dir_names_3_14_to_4_8.c b/eppic_scripts/dir_names_3_14_to_4_8.c new file mode 100644 index 0000000..5bf08fe --- /dev/null +++ b/eppic_scripts/dir_names_3_14_to_4_8.c @@ -0,0 +1,82 @@ +string +vfs_opt() +{ + return "l"; +} + +string +vfs_usage() +{ + return "\n"; +} + +static void +vfs_showusage() +{ + printf("usage : vfs %s", vfs_usage()); +} + +string +vfs_help() +{ + return "Help"; +} + +void +rm_names(struct dentry *dir) +{ + struct list_head *next, *head; + unsigned int hash_len; + int i; + + memset(dir->d_iname, 0, 0x20); + hash_len = *((unsigned int *)&dir->d_name); + memset(dir->d_name.name, 0, hash_len); + + head = (struct list_head *)&(dir->d_subdirs); + next = (struct list_head *)dir->d_subdirs.next; + + while (next != head) + { + struct dentry *child, *off = 0; + + child = (struct dentry *)((unsigned long)next - (unsigned long)&(off->d_child)); + rm_names(child); + next = child->d_child.next; + } + + return; +} + +int +vfs() +{ + int i; + struct hlist_bl_head *tab; + unsigned int d_hash_size = d_hash_mask; + + tab = (struct hlist_bl_head *)dentry_hashtable; + + for (i = 0; i < d_hash_size; i++) + { + struct hlist_bl_head *head; + struct hlist_bl_node *head_node, *next; + + head = (struct hlist_bl_head *) (tab + i); + head_node = head->first; + if (!head_node) + continue; + + next = head_node; + + while (next) + { + struct dentry *root, *off = 0; + + root = (struct dentry *)((unsigned long)next - (unsigned long)&(off->d_hash)); + rm_names(root); + next = next->next; + } + } + return 1; +} diff --git a/eppic_scripts/keyring_3_10_to_4_3.c b/eppic_scripts/keyring_3_10_to_4_3.c new file mode 100644 index 0000000..22e7db8 --- /dev/null +++ b/eppic_scripts/keyring_3_10_to_4_3.c @@ -0,0 +1,57 @@ +string +skey_opt() +{ + return "l"; +} + +string +skey_usage() +{ + return "\n"; +} + +static void +skey_showusage() +{ + printf("usage : skey %s", skey_usage()); +} + +string +skey_help() +{ + return "Help"; +} + +int +skey() +{ + int i; + struct list_head **tab; + + tab = &keyring_name_hash; + + for (i = 0; i < 32; i++) + { + struct list_head *next, *head; + + head = (struct list_head *) (tab + i); + next = (struct list_head *) head->next; + + if (!next) + continue; + + while (next != head) + { + struct key *mykey, *off = 0; + + mykey = (struct key *)((unsigned long)(next) - ((unsigned long)&(off->type_data))); + + memset((char *)&(mykey->payload.value), 'A', 0x8); + memset((char *)mykey->payload.rcudata, 'A', 0x20); + memset((char *)mykey->payload.data, 'A', 0x20); + + next = (struct list_head *) mykey->type_data.link.next; + } + } + return 1; +} diff --git a/eppic_scripts/keyring_4_4_to_4_8.c b/eppic_scripts/keyring_4_4_to_4_8.c new file mode 100644 index 0000000..dcbcd55 --- /dev/null +++ b/eppic_scripts/keyring_4_4_to_4_8.c @@ -0,0 +1,378 @@ +string +skey_opt() +{ + return "l"; +} + +string +skey_usage() +{ + return "\n"; +} + +static void +skey_showusage() +{ + printf("usage : skey %s", skey_usage()); +} + +string +skey_help() +{ + return "Help"; +} + +#define ASSOC_ARRAY_FAN_OUT 16 +#define ASSOC_ARRAY_FAN_MASK (ASSOC_ARRAY_FAN_OUT - 1) +#define ASSOC_ARRAY_LEVEL_STEP (ilog2(ASSOC_ARRAY_FAN_OUT)) +#define ASSOC_ARRAY_LEVEL_STEP_MASK (ASSOC_ARRAY_LEVEL_STEP - 1) +#define ASSOC_ARRAY_KEY_CHUNK_MASK (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1) +#define ASSOC_ARRAY_KEY_CHUNK_SHIFT (ilog2(BITS_PER_LONG)) +#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL +#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL /* Points to leaf (or nowhere) */ +#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL /* Points to node or shortcut */ +#define ASSOC_ARRAY_PTR_SUBTYPE_MASK 0x2UL +#define ASSOC_ARRAY_PTR_NODE_SUBTYPE 0x0UL +#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL + +/* Keyring stuff */ +#define KEYRING_PTR_SUBTYPE 0x2UL + +static int keyring_ptr_is_keyring(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & KEYRING_PTR_SUBTYPE; +} + +static int assoc_array_ptr_is_meta(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK; +} + +static int assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x) +{ + return !assoc_array_ptr_is_meta(x); +} +static int assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK; +} +static int assoc_array_ptr_is_node(const struct assoc_array_ptr *x) +{ + return !assoc_array_ptr_is_shortcut(x); +} + +static void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x) +{ + return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK); +} + +static +unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & + ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK); +} +static +struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x) +{ + return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x); +} +static +struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x) +{ + return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x); +} + +static +struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t) +{ + return (struct assoc_array_ptr *)((unsigned long)p | t); +} +static +struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p) +{ + return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE); +} +static +struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p) +{ + return __assoc_array_x_to_ptr( + p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE); +} + +static +struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p) +{ + return __assoc_array_x_to_ptr( + p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE); +} + +/* Keyring stuff */ +static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x) +{ + void *object = assoc_array_ptr_to_leaf(x); + return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE); +} + +/* BEGIN: struct key access */ +struct keyring_index_key *get_index_key_from_key(struct key *key) +{ + return (struct keyring_index_key *)((unsigned long)&(key->flags) + + sizeof(key->flags)); +} + +struct key_type *get_type_from_key(struct key *key) +{ + return (struct key_type *)((unsigned long)&(key->flags) + + sizeof(key->flags)); +} + +char *get_description_from_key(struct key *key) +{ + return (char *)((unsigned long)&(key->flags) + + sizeof(key->flags) + + sizeof(struct key_type *)); +} + +union key_payload *get_payload_from_key(struct key *key) +{ + return (union key_payload *)((unsigned long)&(key->flags) + + sizeof(key->flags) + + sizeof(struct keyring_index_key)); +} + +struct list_head *get_name_link_from_key(struct key *key) +{ + return (struct list_head *)((unsigned long)&(key->flags) + + sizeof(key->flags) + + sizeof(struct keyring_index_key)); +} + +struct assoc_array *get_keys_from_key(struct key *key) +{ + return (struct assoc_array *)((unsigned long)&(key->flags) + + sizeof(key->flags) + + sizeof(struct keyring_index_key) + + sizeof(struct list_head)); +} +/* END: struct key access */ + +static void delete_keyring_subtree(struct assoc_array_ptr *root) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *cursor, *parent; + int slot = -1; + + cursor = root; + if (!cursor) { + return; + } + + if (assoc_array_ptr_is_shortcut(cursor)) { + /* Descend through a shortcut */ + shortcut = assoc_array_ptr_to_shortcut(cursor); + parent = cursor; + cursor = shortcut->next_node; + } + + node = assoc_array_ptr_to_node(cursor); + slot = 0; + + if(node->nr_leaves_on_branch <= 0) return; + + do { + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + struct assoc_array_ptr *ptr = node->slots[slot]; + + if (!ptr) + continue; + if (assoc_array_ptr_is_meta(ptr)) { + parent = cursor; + cursor = ptr; + if (assoc_array_ptr_is_shortcut(cursor)) { + /* Descend through a shortcut */ + shortcut = assoc_array_ptr_to_shortcut(cursor); + parent = cursor; + cursor = shortcut->next_node; + } + node = assoc_array_ptr_to_node(cursor); + slot = 0; + } else { + struct key *leaf; + struct keyring_index_key *index_key; + char *description; + void *payload_ptr; + int i,j; + + /* no need to delete keyrings, only data */ + if(keyring_ptr_is_keyring(ptr)) + continue; + + /* delete the leaf payload */ + leaf = (struct key *)assoc_array_ptr_to_leaf(ptr); + index_key = get_index_key_from_key(leaf); + /* + Now delete the keys of the different key types. + The following key types are handled for now: + user, ceph, pkcs7_test, asymmetric(X509), rxpc + + The following key types are NOT handled (yet): + dns_resolver (no secret keys, just used for DNS) + + Add a new else if() for new key types. + */ + if(getstr(index_key->type->name) == "user") { + struct user_key_payload **user_key_payload; + unsigned short datalen; + + payload_ptr=(void *)get_payload_from_key(leaf); + user_key_payload = (struct user_key_payload **)payload_ptr; + datalen = (*user_key_payload)->datalen; + memset((char *)&(*user_key_payload)->data, 'A', datalen); + } else if(getstr(index_key->type->name) == "ceph") { + struct ceph_crypto_key **ceph_payload; + int len; + + payload_ptr=(void *)get_payload_from_key(leaf); + ceph_payload = (struct ceph_crypto_key **)payload_ptr; + len = (*ceph_payload)->len; + memset((char *)&(*ceph_payload)->key, 'A', len); + } else if(getstr(index_key->type->name) == "pkcs7_test") { + struct user_key_payload **user_key_payload; + unsigned short datalen; + + payload_ptr=(void *)get_payload_from_key(leaf); + user_key_payload = (struct user_key_payload **)payload_ptr; + datalen = (*user_key_payload)->datalen; + memset((char *)&(*user_key_payload)->data, 'A', datalen); + } else if(getstr(index_key->type->name) == "asymmetric") { + struct public_key **public_key; + unsigned short keylen; + + /* data[0] is asym_crypto */ + payload_ptr=(void *)get_payload_from_key(leaf); + public_key = (struct public_key **)payload_ptr; + keylen = (*public_key)->keylen; + memset((char *)&(*public_key)->key, 'A', keylen); + } else if(getstr(index_key->type->name) == ".request_key_auth") { + struct request_key_auth **request_key; + unsigned short datalen; + + payload_ptr=(void *)get_payload_from_key(leaf); + request_key = (struct request_key_auth **)payload_ptr; + datalen = leaf->datalen; + memset((char *)&(*request_key)->data, 'A', datalen); + } else if(getstr(index_key->type->name) == "rxrpc") { + struct rxrpc_key_token **rxrpc_key_token, *token; + struct rxkad_key *kad; + struct rxk5_key *k5; + int token_count = 0; + + payload_ptr=(void *)get_payload_from_key(leaf); + rxrpc_key_token = (struct rxrpc_key_token **)payload_ptr; + for(; rxrpc_key_token; + rxrpc_key_token = &(*rxrpc_key_token)->next, + token_count++) { + token = *rxrpc_key_token; + switch(token->security_index) { + case 2 : /* RXRPC_SECURITY_RXKAD */ + /* anonymous union, use pointer arithmetic */ + kad = token->next + + sizeof(struct rxrpc_key_token *); + memset(&kad.session_key, 'A', 8); + memset(&kad.ticket, 'A', kad.ticket_len); + break; + case 5 : /* RXRPC_SECURITY_RXK5 */ + /* anonymous union, use pointer arithmetic */ + k5 = token->next + + sizeof(struct rxrpc_key_token *); + memset(k5.ticket, 'A', k5.ticket_len); + memset(k5.ticket2, 'A', k5.ticket2_len); + memset(k5.session.data, 'A', k5.session.data_len); + memset(k5->addresses.data, 'A', k5->addresses.data_len); + memset(k5->authdata.data, 'A', k5->authdata.data_len); + break; + default : + printf("WARNING: unknown security index: %d\n", + token->security_index); + } + /* max number of tokens = 8 */ + if(token_count > 8) { + printf("WARNING: too many rxrpc tokens!\n"); + break; + } + } + } else if(getstr(index_key->type->name) == "dns_resolver") { + /* nothing to do here, no secret data */ + } else if(getstr(index_key->type->name) == "big_key") { + printf("WARNING: key_type=big_key not handled!\n"); + } else { + printf("WARNING: unsupported key type = %s!\n", + getstr(index_key->type->name)); + } + } + } + + parent = node->back_pointer; + slot = node->parent_slot; + if (parent) { + /* Move back up to the parent */ + if (assoc_array_ptr_is_shortcut(parent)) { + shortcut = assoc_array_ptr_to_shortcut(parent); + cursor = parent; + parent = shortcut->back_pointer; + slot = shortcut->parent_slot; + } + + /* Ascend to next slot in parent node */ + cursor = parent; + node = assoc_array_ptr_to_node(cursor); + slot++; + } + } while(parent); + + return; +} + +void delete_keyring(struct assoc_array *keyring) +{ + delete_keyring_subtree(keyring->root); +} + +int +skey() +{ + int i,j,k; + struct list_head **tab; + + tab = &keyring_name_hash; + + for (i = 0; i < 32; i++) + { + struct list_head *next, *head; + + head = (struct list_head *) (tab + i); + next = (struct list_head *) head->next; + + if (!next) + continue; + + while (next != head) + { + struct key *mykey, *off = 0; + struct list_head *name_link; + struct assoc_array *keys; + + mykey = (struct key *)((unsigned long)(next) + - (unsigned long)&(off->flags) + - sizeof(off->flags) + - sizeof(struct keyring_index_key)); + name_link = get_name_link_from_key(mykey); + keys = get_keys_from_key(mykey); + delete_keyring(keys); + next = (struct list_head *) name_link->next; + } + } + return 1; +} diff --git a/eppic_scripts/proc_names_3_10_to_4_8.c b/eppic_scripts/proc_names_3_10_to_4_8.c new file mode 100644 index 0000000..12876df --- /dev/null +++ b/eppic_scripts/proc_names_3_10_to_4_8.c @@ -0,0 +1,49 @@ +string +proc_opt() +{ + return "l"; +} + +string +proc_usage() +{ + return "\n"; +} + +static void +proc_showusage() +{ + printf("usage : proc %s", proc_usage()); +} + +string +proc_help() +{ + return "Help"; +} + +int +proc() +{ + struct list_head *head, *next; + struct task_struct *tsk; + + tsk = &init_task; + + head = (struct list_head *) &(tsk->tasks); + next = (struct list_head *) tsk->tasks.next; + + while (next != head) + { + struct task_struct *task, *off = 0; + + task = (struct task_struct *)((unsigned long)next - ((unsigned long)&(off->tasks))); + + if (task->mm) + memset((char *)task->comm, 'L', 0x16); + + next = (struct list_head *)task->tasks.next; + } + + return 1; +} diff --git a/eppic_scripts/tcp_sk_buf_3_10_to_4_8.c b/eppic_scripts/tcp_sk_buf_3_10_to_4_8.c new file mode 100644 index 0000000..922f507 --- /dev/null +++ b/eppic_scripts/tcp_sk_buf_3_10_to_4_8.c @@ -0,0 +1,82 @@ +string +tcp_opt() +{ + return "l"; +} + +string +tcp_usage() +{ + return "\n"; +} + +static void +tcp_showusage() +{ + printf("usage : tcp %s", tcp_non_legacy_usage()); +} + +string +tcp_help() +{ + return "Help"; +} + +int +tcp() +{ + int i; + struct inet_hashinfo *tab; + struct sock_common *off = 0; + + tab = &tcp_hashinfo; + + for (i = 0; i < 32; i++) { + struct hlist_nulls_node *pos; + + pos = tab->listening_hash[i].head.first; + + while (!((unsigned long)pos & 1)) { + struct sock *sk; + struct sk_buff *next; + struct sk_buff_head *head; + struct hlist_nulls_node *node; + + sk = (struct sock *)((unsigned long)pos - (unsigned long)&(off->skc_dontcopy_begin)); + + head = (struct sk_buff_head *)&(sk->sk_receive_queue); + next = (struct sk_buff *)sk->sk_receive_queue.next; + + while (next != head) + { + struct sk_buff *buff = (struct sk_buff *) next; + + if (buff->data_len) { + memset((char *)buff->data, 'L', buff->data_len); + memset((char *)&(buff->data_len), 'L', 0x4); + } + + next = buff->next; + } + + head = (struct sk_buff_head *)&(sk->sk_write_queue); + next = (struct sk_buff *)sk->sk_write_queue.next; + + while (next != head) + { + struct sk_buff *buff = (struct sk_buff *) next; + + if (buff->data_len) { + memset((char *)buff->data, 'L', buff->data_len); + memset((char *)&(buff->data_len), 'L', 0x4); + } + + next = buff->next; + } + + node = (struct hlist_nulls_node *)((unsigned long)sk + (unsigned long)&(off->skc_dontcopy_begin)); + pos = node->next; + } + } + return 1; +} diff --git a/eppic_scripts/udp_sk_buf_3_10_to_4_8.c b/eppic_scripts/udp_sk_buf_3_10_to_4_8.c new file mode 100644 index 0000000..ca8c77b --- /dev/null +++ b/eppic_scripts/udp_sk_buf_3_10_to_4_8.c @@ -0,0 +1,83 @@ +string +udp_opt() +{ + return "l"; +} + +string +udp_usage() +{ + return "\n"; +} + +static void +udp_showusage() +{ + printf("usage : udp %s", udp_usage()); +} + +string +udp_help() +{ + return "Help"; +} + +int +udp() +{ + int i; + int size; + struct udp_table *table; + struct sock_common *off = 0; + + table = (struct udp_table *)&udp_table; + + for (i = 0; i < table->mask; i++) { + struct hlist_nulls_node *pos; + + pos = table->hash[i].head.first; + + while (!((unsigned long)pos & 1)) { + struct sock *sk; + struct sk_buff *next; + struct sk_buff_head *head; + struct hlist_nulls_node *node; + + sk = (struct sock *)((unsigned long)pos - ((unsigned long)&(off->skc_dontcopy_begin))); + + head = (struct sk_buff_head *)&(sk->sk_receive_queue); + next = (struct sk_buff *)sk->sk_receive_queue.next; + + while (next != head) + { + struct sk_buff *buff = (struct sk_buff *)next; + + if (buff->data_len) { + memset((char *)buff->data, 'L', buff->data_len); + memset((char *)&(buff->data_len), 'L', 0x4); + } + + next = buff->next; + } + + head = (struct sk_buff_head *)&(sk->sk_write_queue); + next = (struct sk_buff *)sk->sk_write_queue.next; + + while (next != head) + { + struct sk_buff *buff = (struct sk_buff *)next; + + if (buff->data_len) { + memset((char *)buff->data, 'L', buff->data_len); + memset((char *)&(buff->data_len), 'L', 0x4); + } + + next = buff->next; + } + + node = (struct hlist_nulls_node *)((unsigned long)sk + (unsigned long)&(off->skc_dontcopy_begin)); + pos = node->next; + } + } + return 1; +} diff --git a/eppic_scripts/unix_sk_buff_3_10_to_4_8.c b/eppic_scripts/unix_sk_buff_3_10_to_4_8.c new file mode 100644 index 0000000..12e1e6c --- /dev/null +++ b/eppic_scripts/unix_sk_buff_3_10_to_4_8.c @@ -0,0 +1,85 @@ +string +sunix_opt() +{ + return "l"; +} + +string +sunix_usage() +{ + return "\n"; +} + +static void +sunix_showusage() +{ + printf("usage : sunix %s", sunix_usage()); +} + +string +sunix_help() +{ + return "Help"; +} + +int +sunix() +{ + int i; + int size; + struct hlist_head **tab; + struct sock_common *off = 0; + + tab = &unix_socket_table; + + for (i = 0; i < 256; i++) { + struct hlist_node *pos; + struct hlist_node *node; + struct hlist_head *tmp; + + tmp = (struct hlist_head *)(tab + i); + pos = tmp->first; + + while (pos) { + struct sock *sk; + struct sk_buff *next; + struct sk_buff_head *head; + + sk = (struct sock *)((unsigned long)pos - (unsigned long)&(off->skc_dontcopy_begin)); + + head = (struct sk_buff_head *)&(sk->sk_receive_queue); + next = (struct sk_buff *)sk->sk_receive_queue.next; + + while (next != head) + { + struct sk_buff *buff = (struct sk_buff *)next; + + if (buff->data_len) { + memset((char *)buff->data, 'L', buff->data_len); + memset((char *)&(buff->data_len), 'L', 0x4); + } + + next = buff->next; + } + + head = (struct sk_buff_head *)&(sk->sk_write_queue); + next = (struct sk_buff *)sk->sk_write_queue.next; + + while (next != head) + { + struct sk_buff *buff = (struct sk_buff *)next; + + if (buff->data_len) { + memset((char *)buff->data, 'L', buff->data_len); + memset((char *)&(buff->data_len), 'L', 0x4); + } + + next = buff->next; + } + + node = (struct hlist_node *)((unsigned long)sk + (unsigned long)&(off->skc_dontcopy_begin)); + pos = node->next; + } + } + return 1; +} diff --git a/eppic_scripts/vhost_net_buffers_3_10_to_3_18.c b/eppic_scripts/vhost_net_buffers_3_10_to_3_18.c new file mode 100644 index 0000000..39ae595 --- /dev/null +++ b/eppic_scripts/vhost_net_buffers_3_10_to_3_18.c @@ -0,0 +1,99 @@ +string +vhost_opt() +{ + return "l"; +} + +string +vhost_usage() +{ + return "\n"; +} + +static void +vhost_showusage() +{ + printf("usage : net_ %s", vhost_usage()); +} + +string +vhost_help() +{ + return "Help"; +} + +void +vhost_net(struct vhost_net *net) +{ + int i; + + for (i = 0; i < 2; i++) { + struct vhost_net_virtqueue *nvq = &net->vqs[i]; + struct vhost_virtqueue *vq = &nvq->vq; + struct socket *sock = (struct socket *)vq->private_data; + struct sock *sk = sock->sk; + + struct sk_buff_head *head = &(sk->sk_receive_queue); + struct sk_buff *next = sk->sk_receive_queue.next; + + while (next != head) + { + struct sk_buff *buff = (struct sk_buff *) next; + + if (buff->data_len) { + memset((char *)buff->data, 'L', buff->data_len); + memset((char *)&(buff->data_len), 'L', 0x4); + } + + next = buff->next; + } + + head = (struct sk_buff_head *)&(sk->sk_write_queue); + next = (struct sk_buff *)sk->sk_write_queue.next; + + while (next != head) + { + struct sk_buff *buff = (struct sk_buff *) next; + + if (buff->data_len) { + memset((char *)buff->data, 'L', buff->data_len); + memset((char *)&(buff->data_len), 'L', 0x4); + } + + next = buff->next; + + } + } +} + +int +vhost() +{ + struct list_head *head, *next; + struct task_struct *tsk; + + tsk = &init_task; + + head = (struct list_head *) &(tsk->tasks); + next = (struct list_head *) tsk->tasks.next; + + while (next != head) + { + int i; + struct task_struct *task, *off = 0; + + task = (struct task_struct *)((unsigned long)next - ((unsigned long)&(off->tasks))); + + if (task->files && task->files->fdt) { + for (i = 0; i < task->files->fdt->max_fds; i++) { + if (task->files->fdt->fd[i] && task->files->fdt->fd[i]->f_op + && task->files->fdt->fd[i]->f_op->open == &vhost_net_open) + vhost_net((struct vhost_net *)task->files->fdt->fd[i]->private_data); + } + } + + next = (struct list_head *)task->tasks.next; + } + + return 1; +} diff --git a/eppic_scripts/vhost_net_buffers_3_19_to_4_8.c b/eppic_scripts/vhost_net_buffers_3_19_to_4_8.c new file mode 100644 index 0000000..1260acb --- /dev/null +++ b/eppic_scripts/vhost_net_buffers_3_19_to_4_8.c @@ -0,0 +1,104 @@ +string +vhost_opt() +{ + return "l"; +} + +string +vhost_usage() +{ + return "\n"; +} + +static void +vhost_showusage() +{ + printf("usage : net_ %s", vhost_usage()); +} + +string +vhost_help() +{ + return "Help"; +} + +void +vhost_net(struct vhost_net *net) +{ + int i; + + for (i = 0; i < 2; i++) { + struct vhost_net_virtqueue *nvq = &net->vqs[i]; + struct vhost_virtqueue *vq = &nvq->vq; + struct socket *sock = (struct socket *)vq->private_data; + struct sock *sk = sock->sk; + + struct sk_buff_head *head = &(sk->sk_receive_queue); + struct sk_buff *next = sk->sk_receive_queue.next; + + while (next != head) + { + struct sk_buff *buff = (struct sk_buff *) next; + + if (buff->data_len) { + memset((char *)buff->data, 'L', buff->data_len); + memset((char *)&(buff->data_len), 'L', 0x4); + } + + /* + * .next is the first entry. + */ + next = (struct sk_buff *)(unsigned long)*buff; + } + + head = (struct sk_buff_head *)&(sk->sk_write_queue); + next = (struct sk_buff *)sk->sk_write_queue.next; + + while (next != head) + { + struct sk_buff *buff = (struct sk_buff *) next; + + if (buff->data_len) { + memset((char *)buff->data, 'L', buff->data_len); + memset((char *)&(buff->data_len), 'L', 0x4); + } + + /* + * .next is the first entry. + */ + next = (struct sk_buff *)(unsigned long)*buff; + } + } +} + +int +vhost() +{ + struct list_head *head, *next; + struct task_struct *tsk; + + tsk = &init_task; + + head = (struct list_head *) &(tsk->tasks); + next = (struct list_head *) tsk->tasks.next; + + while (next != head) + { + int i; + struct task_struct *task, *off = 0; + + task = (struct task_struct *)((unsigned long)next - ((unsigned long)&(off->tasks))); + + if (task->files && task->files->fdt) { + for (i = 0; i < task->files->fdt->max_fds; i++) { + if (task->files->fdt->fd[i] && task->files->fdt->fd[i]->f_op + && task->files->fdt->fd[i]->f_op->open == &vhost_net_open) + vhost_net((struct vhost_net *)task->files->fdt->fd[i]->private_data); + } + } + + next = (struct list_head *)task->tasks.next; + } + + return 1; +} diff --git a/eppic_scripts/vhost_scsi_buffers_3_10_to_4_8.c b/eppic_scripts/vhost_scsi_buffers_3_10_to_4_8.c new file mode 100644 index 0000000..840cdd5 --- /dev/null +++ b/eppic_scripts/vhost_scsi_buffers_3_10_to_4_8.c @@ -0,0 +1,75 @@ +string +vhost_opt() +{ + return "l"; +} + +string +vhost_usage() +{ + return "\n"; +} + +static void +vhost_showusage() +{ + printf("usage : vhost %s", vhost_usage()); +} + +string +vhost_help() +{ + return "Help"; +} + +void +vhost_scsi(struct vhost_scsi *vs) +{ + if (vs == NULL) + return; + + for (i = 0; i < 128; i++) { + struct vhost_virtqueue *vq = (struct vhost_virtqueue *)vs->vqs[i].vq; + + for (j = 0; j < 1024; j++) { + + if (vq->iov[j].iov_len) { + memset((char *)vq->iov[j].iov_base, 'L', vq->iov[j].iov_len); + memset((char *)&(vq->iov[j].iov_len), 'L', 0x8); + } + } + } +} + +int +vhost() +{ + struct list_head *head, *next; + struct task_struct *tsk; + + tsk = &init_task; + + head = (struct list_head *) &(tsk->tasks); + next = (struct list_head *) tsk->tasks.next; + + while (next != head) + { + int i; + struct task_struct *task, *off = 0; + + task = (struct task_struct *)((unsigned long)next - ((unsigned long)&(off->tasks))); + + if (task->files && task->files->fdt) { + for (i = 0; i < task->files->fdt->max_fds; i++) { + if (task->files->fdt->fd[i] && task->files->fdt->fd[i]->f_op + && task->files->fdt->fd[i]->f_op->open == &vhost_scsi_open) + vhost_scsi((struct vhost_scsi *)task->files->fdt->fd[i]->private_data); + } + } + + + next = (struct list_head *)task->tasks.next; + } + + return 1; +} diff --git a/erase_info.c b/erase_info.c new file mode 100644 index 0000000..60abfa1 --- /dev/null +++ b/erase_info.c @@ -0,0 +1,2470 @@ +/* + * erase_info.c + * + * Created by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> + * + * Copyright (C) 2011 IBM Corporation + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include "makedumpfile.h" +#include "print_info.h" +#include "dwarf_info.h" +#include "erase_info.h" + +#include <dlfcn.h> + +struct erase_info *erase_info = NULL; +unsigned long num_erase_info = 1; /* Node 0 is unused. */ + +struct call_back eppic_cb = { + &get_domain_all, + &readmem, + &get_die_attr_type, + &get_die_name, + &get_die_offset, + &get_die_length, + &get_die_member_all, + &get_die_nfields_all, + &get_symbol_addr_all, + &update_filter_info_raw +}; + + +/* + * flags for config_entry.flag + */ +#define FILTER_ENTRY 0x0001 +#define SIZE_ENTRY 0x0002 +#define ITERATION_ENTRY 0x0004 +#define LIST_ENTRY 0x0008 +#define SYMBOL_ENTRY 0x0010 +#define VAR_ENTRY 0x0020 +#define TRAVERSAL_ENTRY 0x0040 +#define ENTRY_RESOLVED 0x8000 + +/* + * flags for get_config() + */ +#define CONFIG_SKIP_SECTION 0x01 +#define CONFIG_NEW_CMD 0x02 + +#define IS_KEYWORD(tkn) \ + (!strcmp(tkn, "erase") || !strcmp(tkn, "size") || \ + !strcmp(tkn, "nullify") || !strcmp(tkn, "for") || \ + !strcmp(tkn, "in") || !strcmp(tkn, "within") || \ + !strcmp(tkn, "endfor")) + +struct module_sym_table { + unsigned int num_modules; + unsigned int current_mod; + struct module_info *modules; +}; + +/* + * Filtering physical address range. + */ +struct filter_info { + unsigned long long vaddr; /* symbol address for debugging */ + unsigned long long paddr; + long size; + + /* direct access to update erase information node */ + int erase_info_idx; /* 0= invalid index */ + int size_idx; + + int erase_ch; + + struct filter_info *next; + unsigned short nullify; +}; + +/* + * Filter config information + */ +struct filter_config { + char *name_filterconfig; + FILE *file_filterconfig; + char *cur_module; + char *saved_token; + char *token; + int new_section; + int line_count; +}; + +struct config_entry { + char *name; + char *type_name; + char *symbol_expr; /* original symbol expression */ + unsigned short flag; + unsigned short nullify; + unsigned long long sym_addr; /* Symbol address */ + unsigned long vaddr; /* Symbol address or + value pointed by sym_addr */ + unsigned long long cmp_addr; /* for LIST_ENTRY */ + unsigned long offset; + unsigned long type_flag; + long array_length; + long index; + long size; + int line; /* Line number in config file. */ + int erase_info_idx; /* 0= invalid index */ + struct config_entry *refer_to; + struct config_entry *next; +}; + +struct config { + char *module_name; + struct config_entry *iter_entry; + struct config_entry *list_entry; + int num_filter_symbols; + struct config_entry **filter_symbol; + struct config_entry **size_symbol; +}; + +static struct module_sym_table mod_st = { 0 }; +static struct filter_info *filter_info = NULL; +static struct filter_config filter_config; +static char config_buf[BUFSIZE_FGETS]; + + +/* + * Internal functions. + */ +static struct module_info * +get_loaded_module(char *mod_name) +{ + unsigned int i; + struct module_info *modules; + + modules = mod_st.modules; + if (strcmp(mod_name, modules[mod_st.current_mod].name)) { + for (i = 0; i < mod_st.num_modules; i++) { + if (!strcmp(mod_name, modules[i].name)) + break; + } + if (i == mod_st.num_modules) + return NULL; + /* set the current_mod for fast lookup next time */ + mod_st.current_mod = i; + } + + return &modules[mod_st.current_mod]; +} + +static unsigned long long +find_module_symbol(struct module_info *module_ptr, char *symname) +{ + int i; + struct symbol_info *sym_info; + + sym_info = module_ptr->sym_info; + if (!sym_info) + return FALSE; + for (i = 1; i < module_ptr->num_syms; i++) { + if (sym_info[i].name && !strcmp(sym_info[i].name, symname)) + return sym_info[i].value; + } + return NOT_FOUND_SYMBOL; +} + +static int +sym_in_module(char *symname, unsigned long long *symbol_addr) +{ + char *module_name; + struct module_info *module_ptr; + + module_name = get_dwarf_module_name(); + if (!mod_st.num_modules + || !strcmp(module_name, "vmlinux") + || !strcmp(module_name, "xen-syms")) + return FALSE; + + module_ptr = get_loaded_module(module_name); + if (!module_ptr) + return FALSE; + *symbol_addr = find_module_symbol(module_ptr, symname); + if (*symbol_addr == NOT_FOUND_SYMBOL) + return FALSE; + else + return TRUE; +} + +static unsigned int +get_num_modules(unsigned long head, unsigned int *num) +{ + unsigned long cur; + unsigned int num_modules = 0; + + if (!num) + return FALSE; + + if (!readmem(VADDR, head + OFFSET(list_head.next), &cur, sizeof cur)) { + ERRMSG("Can't get next list_head.\n"); + return FALSE; + } + while (cur != head) { + num_modules++; + if (!readmem(VADDR, cur + OFFSET(list_head.next), + &cur, sizeof cur)) { + ERRMSG("Can't get next list_head.\n"); + return FALSE; + } + } + *num = num_modules; + return TRUE; +} + +static void +free_symbol_info(struct module_info *module) +{ + int i; + + if (module->num_syms == 0) + return; + + for (i = 1; i < module->num_syms; i++) + if (module->sym_info[i].name) + free(module->sym_info[i].name); + free(module->sym_info); +} + +static void +clean_module_symbols(void) +{ + int i; + + for (i = 0; i < mod_st.num_modules; i++) + free_symbol_info(&mod_st.modules[i]); + + if (mod_st.num_modules) { + free(mod_st.modules); + mod_st.modules = NULL; + mod_st.num_modules = 0; + } +} + +static int +__load_module_symbol(struct module_info *modules, unsigned long addr_module) +{ + int ret = FALSE; + unsigned int nsym; + unsigned long symtab, strtab; + unsigned long mod_base, mod_init; + unsigned int mod_size, mod_init_size; + unsigned char *module_struct_mem = NULL; + unsigned char *module_core_mem = NULL; + unsigned char *module_init_mem = NULL; + unsigned char *symtab_mem; + char *module_name, *strtab_mem, *nameptr; + unsigned int num_symtab; + + /* Allocate buffer to read struct module data from vmcore. */ + if ((module_struct_mem = calloc(1, SIZE(module))) == NULL) { + ERRMSG("Failed to allocate buffer for module\n"); + return FALSE; + } + if (!readmem(VADDR, addr_module, module_struct_mem, + SIZE(module))) { + ERRMSG("Can't get module info.\n"); + goto out; + } + + module_name = (char *)(module_struct_mem + OFFSET(module.name)); + if (strlen(module_name) < MOD_NAME_LEN) + strcpy(modules->name, module_name); + else + strncpy(modules->name, module_name, MOD_NAME_LEN-1); + + mod_init = ULONG(module_struct_mem + + OFFSET(module.module_init)); + mod_init_size = UINT(module_struct_mem + + OFFSET(module.init_size)); + mod_base = ULONG(module_struct_mem + + OFFSET(module.module_core)); + mod_size = UINT(module_struct_mem + + OFFSET(module.core_size)); + + DEBUG_MSG("Module: %s, Base: 0x%lx, Size: %u\n", + module_name, mod_base, mod_size); + if (mod_init_size > 0) { + module_init_mem = calloc(1, mod_init_size); + if (module_init_mem == NULL) { + ERRMSG("Can't allocate memory for module " + "init\n"); + goto out; + } + if (!readmem(VADDR, mod_init, module_init_mem, + mod_init_size)) { + ERRMSG("Can't access module init in memory.\n"); + goto out; + } + } + + if ((module_core_mem = calloc(1, mod_size)) == NULL) { + ERRMSG("Can't allocate memory for module\n"); + goto out; + } + if (!readmem(VADDR, mod_base, module_core_mem, mod_size)) { + ERRMSG("Can't access module in memory.\n"); + goto out; + } + + num_symtab = UINT(module_struct_mem + + OFFSET(module.num_symtab)); + if (!num_symtab) { + ERRMSG("%s: Symbol info not available\n", module_name); + goto out; + } + modules->num_syms = num_symtab; + DEBUG_MSG("num_sym: %d\n", num_symtab); + + symtab = ULONG(module_struct_mem + OFFSET(module.symtab)); + strtab = ULONG(module_struct_mem + OFFSET(module.strtab)); + + /* check if symtab and strtab are inside the module space. */ + if (!IN_RANGE(symtab, mod_base, mod_size) && + !IN_RANGE(symtab, mod_init, mod_init_size)) { + ERRMSG("%s: module symtab is outside of module " + "address space\n", module_name); + goto out; + } + if (IN_RANGE(symtab, mod_base, mod_size)) + symtab_mem = module_core_mem + (symtab - mod_base); + else + symtab_mem = module_init_mem + (symtab - mod_init); + + if (!IN_RANGE(strtab, mod_base, mod_size) && + !IN_RANGE(strtab, mod_init, mod_init_size)) { + ERRMSG("%s: module strtab is outside of module " + "address space\n", module_name); + goto out; + } + if (IN_RANGE(strtab, mod_base, mod_size)) + strtab_mem = (char *)(module_core_mem + + (strtab - mod_base)); + else + strtab_mem = (char *)(module_init_mem + + (strtab - mod_init)); + + modules->sym_info = calloc(num_symtab, sizeof(struct symbol_info)); + if (modules->sym_info == NULL) { + ERRMSG("Can't allocate memory to store sym info\n"); + goto out; + } + + /* symbols starts from 1 */ + for (nsym = 1; nsym < num_symtab; nsym++) { + Elf32_Sym *sym32; + Elf64_Sym *sym64; + /* + * TODO: + * If case of ELF vmcore then the word size can be + * determined using flag_elf64_memory flag. + * But in case of kdump-compressed dump, kdump header + * does not carry word size info. May be in future + * this info will be available in kdump header. + * Until then, in order to make this logic work on both + * situation we depend on pointer_size that is + * extracted from vmlinux dwarf information. + */ + if ((get_pointer_size() * 8) == 64) { + sym64 = (Elf64_Sym *) (symtab_mem + + (nsym * sizeof(Elf64_Sym))); + modules->sym_info[nsym].value = + (unsigned long long) sym64->st_value; + nameptr = strtab_mem + sym64->st_name; + } else { + sym32 = (Elf32_Sym *) (symtab_mem + + (nsym * sizeof(Elf32_Sym))); + modules->sym_info[nsym].value = + (unsigned long long) sym32->st_value; + nameptr = strtab_mem + sym32->st_name; + } + if (strlen(nameptr)) + modules->sym_info[nsym].name = strdup(nameptr); + DEBUG_MSG("\t[%d] %llx %s\n", nsym, + modules->sym_info[nsym].value, nameptr); + } + ret = TRUE; +out: + free(module_struct_mem); + free(module_core_mem); + free(module_init_mem); + + return ret; +} + +static int +load_module_symbols(void) +{ + unsigned long head, cur, cur_module; + struct module_info *modules = NULL; + unsigned int i = 0; + + head = SYMBOL(modules); + if (!get_num_modules(head, &mod_st.num_modules) || + !mod_st.num_modules) { + ERRMSG("Can't get module count\n"); + return FALSE; + } + mod_st.modules = calloc(mod_st.num_modules, + sizeof(struct module_info)); + if (!mod_st.modules) { + ERRMSG("Can't allocate memory for module info\n"); + return FALSE; + } + modules = mod_st.modules; + + if (!readmem(VADDR, head + OFFSET(list_head.next), &cur, sizeof cur)) { + ERRMSG("Can't get next list_head.\n"); + return FALSE; + } + + /* Travese the list and read module symbols */ + while (cur != head) { + cur_module = cur - OFFSET(module.list); + + if (!__load_module_symbol(&modules[i], cur_module)) + return FALSE; + + if (!readmem(VADDR, cur + OFFSET(list_head.next), + &cur, sizeof cur)) { + ERRMSG("Can't get next list_head.\n"); + return FALSE; + } + i++; + } + return TRUE; +} + +static void +free_config_entry(struct config_entry *ce) +{ + struct config_entry *p; + + while(ce) { + p = ce; + ce = p->next; + if (p->name) + free(p->name); + if (p->type_name) + free(p->type_name); + if (p->symbol_expr) + free(p->symbol_expr); + free(p); + } +} + +static void +free_config(struct config *config) +{ + int i; + + if (config == NULL) + return; + + if (config->module_name) + free(config->module_name); + for (i = 0; i < config->num_filter_symbols; i++) { + if (config->filter_symbol[i]) + free_config_entry(config->filter_symbol[i]); + if (config->size_symbol[i]) + free_config_entry(config->size_symbol[i]); + } + if (config->filter_symbol) + free(config->filter_symbol); + if (config->size_symbol) + free(config->size_symbol); + free(config); +} + +static void +print_config_entry(struct config_entry *ce) +{ + while (ce) { + DEBUG_MSG("Name: %s\n", ce->name); + DEBUG_MSG("Type Name: %s, ", ce->type_name); + DEBUG_MSG("flag: %x, ", ce->flag); + DEBUG_MSG("Type flag: %lx, ", ce->type_flag); + DEBUG_MSG("sym_addr: %llx, ", ce->sym_addr); + DEBUG_MSG("vaddr: %lx, ", ce->vaddr); + DEBUG_MSG("offset: %llx, ", (unsigned long long)ce->offset); + DEBUG_MSG("size: %ld\n", ce->size); + + ce = ce->next; + } +} + +/* + * Read the non-terminal's which are in the form of <Symbol>[.member[...]] + */ +static struct config_entry * +create_config_entry(const char *token, unsigned short flag, int line) +{ + struct config_entry *ce = NULL, *ptr, *prev_ce; + char *str, *cur, *next; + long len; + int depth = 0; + + if (!token) + return NULL; + + cur = str = strdup(token); + prev_ce = ptr = NULL; + while (cur != NULL) { + if ((next = strchr(cur, '.')) != NULL) { + *next++ = '\0'; + } + if (!strlen(cur)) { + cur = next; + continue; + } + + if ((ptr = calloc(1, sizeof(struct config_entry))) == NULL) { + ERRMSG("Can't allocate memory for config_entry\n"); + goto err_out; + } + ptr->line = line; + ptr->flag |= flag; + if (depth == 0) { + /* First node is always a symbol name */ + ptr->flag |= SYMBOL_ENTRY; + } + if (flag & ITERATION_ENTRY) { + /* Max depth for iteration entry is 1 */ + if (depth > 0) { + ERRMSG("Config error at %d: Invalid iteration " + "variable entry.\n", line); + goto err_out; + } + ptr->name = strdup(cur); + } + if (flag & (FILTER_ENTRY | LIST_ENTRY)) { + ptr->name = strdup(cur); + } + if (flag & SIZE_ENTRY) { + char ch = '\0'; + int n = 0; + /* See if absolute length is provided */ + if ((depth == 0) && + ((n = sscanf(cur, "%ld%c", &len, &ch)) > 0)) { + if (len < 0) { + ERRMSG("Config error at %d: size " + "value must be positive.\n", + line); + goto err_out; + } + ptr->size = len; + ptr->flag |= ENTRY_RESOLVED; + if (n == 2) { + /* Handle suffix. + * K = Kilobytes + * M = Megabytes + */ + switch (ch) { + case 'M': + case 'm': + ptr->size *= 1024; + case 'K': + case 'k': + ptr->size *= 1024; + break; + } + } + } + else + ptr->name = strdup(cur); + } + if (prev_ce) { + prev_ce->next = ptr; + prev_ce = ptr; + } else + ce = prev_ce = ptr; + + cur = next; + ptr = NULL; + depth++; + } + free(str); + return ce; + +err_out: + if (ce) + free_config_entry(ce); + if (ptr) + free_config_entry(ptr); + free(str); + return NULL; +} + +static int +is_module_loaded(char *mod_name) +{ + if (!strcmp(mod_name, "vmlinux") || get_loaded_module(mod_name)) + return TRUE; + return FALSE; +} + +/* + * read filter config file and return each string token. If the parameter + * expected_token is non-NULL, then return the current token if it matches + * with expected_token otherwise save the current token and return NULL. + * At start of every module section filter_config.new_section is set to 1 and + * subsequent function invocations return NULL untill filter_config.new_section + * is reset to 0 by passing @flag = CONFIG_NEW_CMD (0x02). + * + * Parameters: + * @expected_token INPUT + * Token string to match with currnet token. + * =NULL - return the current available token. + * + * @flag INPUT + * =0x01 - Skip to next module section. + * =0x02 - Treat the next token as next filter command and reset. + * + * @line OUTPUT + * Line number of current token in filter config file. + * + * @cur_mod OUTPUT + * Points to current module section name on non-NULL return value. + * + * @eof OUTPUT + * set to -1 when end of file is reached. + * set to -2 when end of section is reached. + */ +#define NOT_REACH_END (0) +#define REACH_END_OF_FILE (-1) +#define REACH_END_OF_SECTION (-2) + +static char * +get_config_token(char *expected_token, unsigned char flag, int *line, + char **cur_mod, int *eof) +{ + char *p; + struct filter_config *fc = &filter_config; + int skip = flag & CONFIG_SKIP_SECTION; + + if (!fc->file_filterconfig) + return NULL; + + if (eof) + *eof = NOT_REACH_END; + + /* + * set token and saved_token to NULL if skip module section is set + * to 1. + */ + if (skip) { + fc->token = NULL; + fc->saved_token = NULL; + + } else if (fc->saved_token) { + fc->token = fc->saved_token; + fc->saved_token = NULL; + + } else if (fc->token) + fc->token = strtok(NULL, " "); + + /* Read next line if we are done all tokens from previous line */ + while (!fc->token && fgets(config_buf, sizeof(config_buf), + fc->file_filterconfig)) { + if ((p = strchr(config_buf, '\n'))) { + *p = '\0'; + fc->line_count++; + } + if ((p = strchr(config_buf, '#'))) { + *p = '\0'; + } + /* replace all tabs with spaces */ + for (p = config_buf; *p != '\0'; p++) + if (*p == '\t') + *p = ' '; + if (config_buf[0] == '[') { + /* module section entry */ + p = strchr(config_buf, ']'); + if (!p) { + ERRMSG("Config error at %d: Invalid module " + "section entry.\n", fc->line_count); + /* skip to next valid module section */ + skip = 1; + } else { + /* + * Found the valid module section. Reset the + * skip flag. + */ + *p = '\0'; + if (fc->cur_module) + free(fc->cur_module); + fc->cur_module = strdup(&config_buf[1]); + fc->new_section = 1; + skip = 0; + } + continue; + } + /* + * If symbol info for current module is not loaded then + * skip to next module section. + */ + if (skip || + (fc->cur_module && !is_module_loaded(fc->cur_module))) + continue; + + fc->token = strtok(config_buf, " "); + } + if (!fc->token) { + if (eof) + *eof = REACH_END_OF_FILE; + return NULL; + } + if (fc->new_section && !(flag & CONFIG_NEW_CMD)) { + fc->saved_token = fc->token; + if (eof) + *eof = REACH_END_OF_SECTION; + return NULL; + } + + fc->new_section = 0; + + if (cur_mod) + *cur_mod = fc->cur_module; + + if (line) + *line = fc->line_count; + + if (expected_token && strcmp(fc->token, expected_token)) { + fc->saved_token = fc->token; + return NULL; + } + return fc->token; +} + +static int +read_size_entry(struct config *config, int line, int idx) +{ + char *token = get_config_token(NULL, 0, &line, NULL, NULL); + + if (!token || IS_KEYWORD(token)) { + ERRMSG("Config error at %d: expected size symbol after" + " 'size' keyword.\n", line); + return FALSE; + } + config->size_symbol[idx] = create_config_entry(token, SIZE_ENTRY, line); + if (!config->size_symbol[idx]) { + ERRMSG("Error at line %d: Failed to read size symbol\n", + line); + return FALSE; + } + if (config->iter_entry && config->size_symbol[idx]->name && + (!strcmp(config->size_symbol[idx]->name, + config->iter_entry->name))) { + config->size_symbol[idx]->flag &= ~SYMBOL_ENTRY; + config->size_symbol[idx]->flag |= VAR_ENTRY; + config->size_symbol[idx]->refer_to = config->iter_entry; + } + return TRUE; +} + +/* + * Read erase command entry. The erase command syntax is: + * + * erase <Symbol>[.member[...]] [size <SizeValue>[K|M]] + * erase <Symbol>[.member[...]] [size <SizeSymbol>] + * erase <Symbol>[.member[...]] [nullify] + */ +static int +read_erase_cmd_entry(struct config *config, int line) +{ + int size, idx; + char *token = get_config_token(NULL, 0, &line, NULL, NULL); + + if (!token || IS_KEYWORD(token)) { + ERRMSG("Config error at %d: expected kernel symbol after" + " 'erase' command.\n", line); + return FALSE; + } + + idx = config->num_filter_symbols; + config->num_filter_symbols++; + size = config->num_filter_symbols * sizeof(struct config_entry *); + config->filter_symbol = realloc(config->filter_symbol, size); + config->size_symbol = realloc(config->size_symbol, size); + + if (!config->filter_symbol || !config->size_symbol) { + ERRMSG("Can't get memory to read config symbols.\n"); + return FALSE; + } + config->filter_symbol[idx] = NULL; + config->size_symbol[idx] = NULL; + + config->filter_symbol[idx] = + create_config_entry(token, FILTER_ENTRY, line); + if (!config->filter_symbol[idx]) { + ERRMSG("Error at line %d: Failed to read filter symbol\n", + line); + return FALSE; + } + + /* + * Save the symbol expression string for generation of eraseinfo data + * later while writing dumpfile. + */ + config->filter_symbol[idx]->symbol_expr = strdup(token); + + if (config->iter_entry) { + if (strcmp(config->filter_symbol[idx]->name, + config->iter_entry->name)) { + ERRMSG("Config error at %d: unused iteration" + " variable '%s'.\n", line, + config->iter_entry->name); + return FALSE; + } + config->filter_symbol[idx]->flag &= ~SYMBOL_ENTRY; + config->filter_symbol[idx]->flag |= VAR_ENTRY; + config->filter_symbol[idx]->refer_to = config->iter_entry; + } + if (get_config_token("nullify", 0, &line, NULL, NULL)) { + config->filter_symbol[idx]->nullify = 1; + + } else if (get_config_token("size", 0, &line, NULL, NULL)) { + if (!read_size_entry(config, line, idx)) + return FALSE; + } + return TRUE; +} + +static int +add_traversal_entry(struct config_entry *ce, char *member, int line) +{ + if (!ce) + return FALSE; + + while (ce->next) + ce = ce->next; + + ce->next = create_config_entry(member, LIST_ENTRY, line); + if (ce->next == NULL) { + ERRMSG("Error at line %d: Failed to read 'via' member\n", + line); + return FALSE; + } + + ce->next->flag |= TRAVERSAL_ENTRY; + ce->next->flag &= ~SYMBOL_ENTRY; + return TRUE; +} + +static int +read_list_entry(struct config *config, int line) +{ + char *token = get_config_token(NULL, 0, &line, NULL, NULL); + + if (!token || IS_KEYWORD(token)) { + ERRMSG("Config error at %d: expected list symbol after" + " 'in' keyword.\n", line); + return FALSE; + } + config->list_entry = create_config_entry(token, LIST_ENTRY, line); + if (!config->list_entry) { + ERRMSG("Error at line %d: Failed to read list symbol\n", + line); + return FALSE; + } + /* Check if user has provided 'via' or 'within' keyword */ + if (get_config_token("via", 0, &line, NULL, NULL)) { + /* next token is traversal member NextMember */ + token = get_config_token(NULL, 0, &line, NULL, NULL); + if (!token) { + ERRMSG("Config error at %d: expected member name after" + " 'via' keyword.\n", line); + return FALSE; + } + if (!add_traversal_entry(config->list_entry, token, line)) + return FALSE; + } + else if (get_config_token("within", 0, &line, NULL, NULL)) { + char *s_name, *lh_member; + /* next value is StructName:ListHeadMember */ + s_name = get_config_token(NULL, 0, &line, NULL, NULL); + if (!s_name || IS_KEYWORD(s_name)) { + ERRMSG("Config error at %d: expected struct name after" + " 'within' keyword.\n", line); + return FALSE; + } + lh_member = strchr(s_name, ':'); + if (lh_member) { + *lh_member++ = '\0'; + if (!strlen(lh_member)) { + ERRMSG("Config error at %d: expected list_head" + " member after ':'.\n", line); + return FALSE; + } + config->iter_entry->next = + create_config_entry(lh_member, + ITERATION_ENTRY, line); + if (!config->iter_entry->next) + return FALSE; + config->iter_entry->next->flag &= ~SYMBOL_ENTRY; + } + if (!strlen(s_name)) { + ERRMSG("Config error at %d: Invalid token found " + "after 'within' keyword.\n", line); + return FALSE; + } + config->iter_entry->type_name = strdup(s_name); + } + return TRUE; +} + +/* + * Read the iteration entry (LoopConstruct). The syntax is: + * + * for <id> in {<ArrayVar> | + * <StructVar> via <NextMember> | + * <ListHeadVar> within <StructName>:<ListHeadMember>} + * erase <id>[.MemberExpression] [size <SizeExpression>|nullify] + * [erase <id>...] + * [...] + * endfor + */ +static int +read_iteration_entry(struct config *config, int line) +{ + int eof = NOT_REACH_END; + char *token = get_config_token(NULL, 0, &line, NULL, NULL); + + if (!token || IS_KEYWORD(token)) { + ERRMSG("Config error at %d: expected iteration VAR entry after" + " 'for' keyword.\n", line); + return FALSE; + } + config->iter_entry = + create_config_entry(token, ITERATION_ENTRY, line); + if (!config->iter_entry) { + ERRMSG("Error at line %d: " + "Failed to read iteration VAR entry.\n", line); + return FALSE; + } + if (!get_config_token("in", 0, &line, NULL, NULL)) { + char *token; + token = get_config_token(NULL, 0, &line, NULL, NULL); + if (token) + ERRMSG("Config error at %d: Invalid token '%s'.\n", + line, token); + ERRMSG("Config error at %d: expected token 'in'.\n", line); + return FALSE; + } + if (!read_list_entry(config, line)) + return FALSE; + + while (!get_config_token("endfor", 0, &line, NULL, &eof) && !eof) { + if (get_config_token("erase", 0, &line, NULL, NULL)) { + if (!read_erase_cmd_entry(config, line)) + return FALSE; + } else { + token = get_config_token(NULL, 0, &line, NULL, NULL); + ERRMSG("Config error at %d: " + "Invalid token '%s'.\n", line, token); + return FALSE; + } + } + if (eof != NOT_REACH_END) { + ERRMSG("Config error at %d: No matching 'endfor' found.\n", + line); + return FALSE; + } + return TRUE; +} + +/* + * Configuration file 'makedumpfile.conf' contains filter commands. + * Every individual filter command is considered as a config entry. + * A config entry can be provided on a single line or multiple lines. + */ +static struct config * +get_config(int skip) +{ + struct config *config; + char *token = NULL; + static int line_count = 0; + char *cur_module = NULL; + int eof = NOT_REACH_END; + unsigned char flag = CONFIG_NEW_CMD; + + if (skip) + flag |= CONFIG_SKIP_SECTION; + + if ((config = calloc(1, sizeof(struct config))) == NULL) + return NULL; + + if (get_config_token("erase", flag, &line_count, &cur_module, &eof)) { + if (cur_module) + config->module_name = strdup(cur_module); + + if (!read_erase_cmd_entry(config, line_count)) + goto err_out; + + } else if (get_config_token("for", 0, &line_count, &cur_module, &eof)) { + if (cur_module) + config->module_name = strdup(cur_module); + + if (!read_iteration_entry(config, line_count)) + goto err_out; + } else { + if (eof == NOT_REACH_END) { + token = get_config_token(NULL, 0, &line_count, + NULL, NULL); + ERRMSG("Config error at %d: Invalid token '%s'.\n", + line_count, token); + } + goto err_out; + } + return config; +err_out: + if (config) + free_config(config); + return NULL; +} + +static unsigned long +read_pointer_value(unsigned long long vaddr) +{ + unsigned long val; + + if (!readmem(VADDR, vaddr, &val, sizeof(val))) { + ERRMSG("Can't read pointer value\n"); + return 0; + } + return val; +} + +static long +get_strlen(unsigned long long vaddr) +{ + char buf[BUFSIZE + 1]; + long len = 0; + + /* + * Determine the string length for 'char' pointer. + * BUFSIZE(1024) is the upper limit for string length. + */ + if (readmem(VADDR, vaddr, buf, BUFSIZE)) { + buf[BUFSIZE] = '\0'; + len = strlen(buf); + } + return len; +} + +static int +resolve_config_entry(struct config_entry *ce, unsigned long long base_vaddr, + char *base_struct_name) +{ + unsigned long long symbol; + + if (ce->flag & SYMBOL_ENTRY) { + /* find the symbol info */ + if (!ce->name) + return FALSE; + + /* + * If we are looking for module symbol then traverse through + * mod_st.modules for symbol lookup + */ + if (sym_in_module(ce->name, &symbol)) + ce->sym_addr = symbol; + else + ce->sym_addr = get_symbol_addr(ce->name); + if (!ce->sym_addr) { + ERRMSG("Config error at %d: Can't find symbol '%s'.\n", + ce->line, ce->name); + return FALSE; + } + ce->sym_addr += get_kaslr_offset(ce->sym_addr); + ce->type_name = get_symbol_type_name(ce->name, + DWARF_INFO_GET_SYMBOL_TYPE, + &ce->size, &ce->type_flag); + if (ce->type_flag & TYPE_ARRAY) { + ce->array_length = get_array_length(ce->name, NULL, + DWARF_INFO_GET_SYMBOL_ARRAY_LENGTH); + if (ce->array_length < 0) + ce->array_length = 0; + } + } else if (ce->flag & VAR_ENTRY) { + /* iteration variable. + * read the value from ce->refer_to + */ + ce->vaddr = ce->refer_to->vaddr; + ce->sym_addr = ce->refer_to->sym_addr; + ce->size = ce->refer_to->size; + ce->type_flag = ce->refer_to->type_flag; + if (!ce->type_name) + ce->type_name = strdup(ce->refer_to->type_name); + + /* This entry has been changed hence next entry needs to + * be resolved accordingly. + */ + if (ce->next) + ce->next->flag &= ~ENTRY_RESOLVED; + return TRUE; + } else { + /* find the member offset */ + ce->offset = get_member_offset(base_struct_name, + ce->name, DWARF_INFO_GET_MEMBER_OFFSET); + ce->sym_addr = base_vaddr + ce->offset; + ce->type_name = get_member_type_name(base_struct_name, + ce->name, DWARF_INFO_GET_MEMBER_TYPE, + &ce->size, &ce->type_flag); + if (ce->type_flag & TYPE_ARRAY) { + ce->array_length = get_array_length(base_struct_name, + ce->name, + DWARF_INFO_GET_MEMBER_ARRAY_LENGTH); + if (ce->array_length < 0) + ce->array_length = 0; + } + } + if (ce->type_name == NULL) { + if (!(ce->flag & SYMBOL_ENTRY)) + ERRMSG("Config error at %d: struct '%s' has no member" + " with name '%s'.\n", + ce->line, base_struct_name, ce->name); + return FALSE; + } + if (!strcmp(ce->type_name, "list_head")) { + ce->type_flag |= TYPE_LIST_HEAD; + /* If this list head expression is a LIST entry then + * mark the next entry as TRAVERSAL_ENTRY, if any. + * Error out if next entry is not a last node. + */ + if ((ce->flag & LIST_ENTRY) && ce->next) { + if (ce->next->next) { + ERRMSG("Config error at %d: Only one traversal" + " entry is allowed for list_head type" + " LIST entry", ce->line); + return FALSE; + } + ce->next->flag |= TRAVERSAL_ENTRY; + } + } + ce->vaddr = ce->sym_addr; + if (ce->size < 0) + ce->size = 0; + if ((ce->flag & LIST_ENTRY) && !ce->next) { + /* This is the last node of LIST entry. + * For the list entry symbol, the allowed data types are: + * Array, Structure Pointer (with 'next' member) and list_head. + * + * If this is a struct or list_head data type then + * create a leaf node entry with 'next' member. + */ + if (((ce->type_flag & (TYPE_BASE | TYPE_ARRAY)) == TYPE_BASE) + && (strcmp(ce->type_name, "void"))) + return FALSE; + + if ((ce->type_flag & TYPE_LIST_HEAD) + || ((ce->type_flag & (TYPE_STRUCT | TYPE_ARRAY)) + == TYPE_STRUCT)) { + if (!(ce->flag & TRAVERSAL_ENTRY)) { + ce->next = create_config_entry("next", + LIST_ENTRY, ce->line); + if (ce->next == NULL) + return FALSE; + + ce->next->flag |= TRAVERSAL_ENTRY; + ce->next->flag &= ~SYMBOL_ENTRY; + } + } + if (ce->flag & TRAVERSAL_ENTRY) { + /* type name of traversal entry should match with + * that of parent node. + */ + if (strcmp(base_struct_name, ce->type_name)) + return FALSE; + } + } + if ((ce->type_flag & (TYPE_ARRAY | TYPE_PTR)) == TYPE_PTR) { + /* If it's a pointer variable (not array) then read the + * pointer value. */ + ce->vaddr = read_pointer_value(ce->sym_addr); + + /* + * if it is a void pointer then reset the size to 0 + * User need to provide a size to filter data referenced + * by 'void *' pointer or nullify option. + */ + if (!strcmp(ce->type_name, "void")) + ce->size = 0; + + } + if ((ce->type_flag & TYPE_BASE) && (ce->type_flag & TYPE_PTR) + && !(ce->type_flag & TYPE_ARRAY)) { + if (!strcmp(ce->type_name, "char")) + ce->size = get_strlen(ce->vaddr); + } + if (!ce->next && (ce->flag & SIZE_ENTRY)) { + void *val; + + /* leaf node of size entry */ + /* If it is size argument then update the size with data + * value of this symbol/member. + * Check if current symbol/member is of base data type. + */ + + if (((ce->type_flag & (TYPE_ARRAY | TYPE_BASE)) != TYPE_BASE) + || (ce->size > sizeof(long))) { + ERRMSG("Config error at %d: size symbol/member '%s' " + "is not of base type.\n", ce->line, ce->name); + return FALSE; + } + if ((val = calloc(1, ce->size)) == NULL) { + ERRMSG("Can't get memory for size parameter\n"); + return FALSE; + } + + if (!readmem(VADDR, ce->vaddr, val, ce->size)) { + ERRMSG("Can't read symbol/member data value\n"); + return FALSE; + } + switch (ce->size) { + case 1: + ce->size = (long)(*((uint8_t *)val)); + break; + case 2: + ce->size = (long)(*((uint16_t *)val)); + break; + case 4: + ce->size = (long)(*((uint32_t *)val)); + break; + case 8: + ce->size = (long)(*((uint64_t *)val)); + break; + } + free(val); + } + ce->flag |= ENTRY_RESOLVED; + if (ce->next) + ce->next->flag &= ~ENTRY_RESOLVED; + return TRUE; +} + +static unsigned long long +get_config_symbol_addr(struct config_entry *ce, + unsigned long long base_vaddr, + char *base_struct_name) +{ + if (!(ce->flag & ENTRY_RESOLVED)) { + if (!resolve_config_entry(ce, base_vaddr, base_struct_name)) + return 0; + } + + if (ce->next && ce->vaddr) { + /* Populate nullify flag down the list */ + ce->next->nullify = ce->nullify; + return get_config_symbol_addr(ce->next, ce->vaddr, + ce->type_name); + } else if (!ce->next && ce->nullify) { + /* nullify is applicable to pointer type */ + if (ce->type_flag & TYPE_PTR) + return ce->sym_addr; + else + return 0; + } else + return ce->vaddr; +} + +static long +get_config_symbol_size(struct config_entry *ce, + unsigned long long base_vaddr, + char *base_struct_name) +{ + if (!(ce->flag & ENTRY_RESOLVED)) { + if (!resolve_config_entry(ce, base_vaddr, base_struct_name)) + return 0; + } + + if (ce->next && ce->vaddr) + return get_config_symbol_size(ce->next, ce->vaddr, + ce->type_name); + else { + if (ce->type_flag & TYPE_ARRAY) { + if (ce->type_flag & TYPE_PTR) + return ce->array_length * get_pointer_size(); + else + return ce->array_length * ce->size; + } + return ce->size; + } +} + +static int +get_next_list_entry(struct config_entry *ce, unsigned long long base_vaddr, + char *base_struct_name, struct config_entry *out_ce) +{ + unsigned long vaddr = 0; + + /* This function only deals with LIST_ENTRY config entry. */ + if (!(ce->flag & LIST_ENTRY)) + return FALSE; + + if (!(ce->flag & ENTRY_RESOLVED)) { + if (!resolve_config_entry(ce, base_vaddr, base_struct_name)) + return FALSE; + } + + if (!ce->next) { + /* leaf node. */ + if (ce->type_flag & TYPE_ARRAY) { + if (ce->index == ce->array_length) + return FALSE; + + if (ce->type_flag & TYPE_PTR) { + /* Array of pointers. + * + * Array may contain NULL pointers at some + * indexes. Hence jump to the next non-null + * address value. + */ + while (ce->index < ce->array_length) { + vaddr = read_pointer_value(ce->vaddr + + (ce->index * get_pointer_size())); + if (vaddr) + break; + ce->index++; + } + if (ce->index == ce->array_length) + return FALSE; + out_ce->sym_addr = ce->vaddr + (ce->index * + get_pointer_size()); + out_ce->vaddr = vaddr; + if (!strcmp(ce->type_name, "char")) + out_ce->size = get_strlen(vaddr); + else + out_ce->size = ce->size; + } else { + out_ce->sym_addr = ce->vaddr + + (ce->index * ce->size); + out_ce->vaddr = out_ce->sym_addr; + out_ce->size = ce->size; + } + ce->index++; + } else { + if (ce->vaddr == ce->cmp_addr) + return FALSE; + + out_ce->vaddr = ce->vaddr; + /* Set the leaf node as unresolved, so that + * it will be resolved every time when + * get_next_list_entry is called untill + * it hits the exit condiftion. + */ + ce->flag &= ~ENTRY_RESOLVED; + } + return TRUE; + + } else if ((ce->next->next == NULL) && + !(ce->next->type_flag & TYPE_ARRAY)) { + /* the next node is leaf node. for non-array element + * Set the sym_addr and addr of this node with that of + * leaf node. + */ + if (!(ce->type_flag & TYPE_LIST_HEAD)) { + if (!ce->vaddr || ce->vaddr == ce->next->cmp_addr) + return FALSE; + + if (!ce->next->cmp_addr) { + /* safeguard against circular + * link-list + */ + ce->next->cmp_addr = ce->vaddr; + } + out_ce->vaddr = ce->vaddr; + out_ce->sym_addr = ce->sym_addr; + out_ce->size = ce->size; + + ce->sym_addr = ce->next->sym_addr; + ce->vaddr = ce->next->vaddr; + + /* Force resolution of traversal node */ + if (ce->vaddr && !resolve_config_entry(ce->next, + ce->vaddr, ce->type_name)) + return FALSE; + + return TRUE; + } else { + ce->sym_addr = ce->next->sym_addr; + ce->vaddr = ce->next->vaddr; + } + } + + if (ce->next && ce->vaddr) + return get_next_list_entry(ce->next, ce->vaddr, + ce->type_name, out_ce); + return FALSE; +} + +static int +resolve_list_entry(struct config_entry *ce, unsigned long long base_vaddr, + char *base_struct_name, char **out_type_name, + unsigned char *out_type_flag) +{ + if (!(ce->flag & ENTRY_RESOLVED)) { + if (!resolve_config_entry(ce, base_vaddr, base_struct_name)) + return FALSE; + } + + if (ce->next && (ce->next->flag & TRAVERSAL_ENTRY) && + (ce->type_flag & TYPE_ARRAY)) { + /* + * We are here because user has provided + * traversal member for ArrayVar using 'via' keyword. + * + * Print warning and continue. + */ + ERRMSG("Warning: line %d: 'via' keyword not required " + "for ArrayVar.\n", ce->next->line); + free_config_entry(ce->next); + ce->next = NULL; + } + if ((ce->type_flag & TYPE_LIST_HEAD) && ce->next && + (ce->next->flag & TRAVERSAL_ENTRY)) { + /* set cmp_addr for list empty condition. */ + ce->next->cmp_addr = ce->sym_addr; + } + if (ce->next && ce->vaddr) { + return resolve_list_entry(ce->next, ce->vaddr, + ce->type_name, out_type_name, out_type_flag); + } + else { + ce->index = 0; + if (out_type_name) + *out_type_name = ce->type_name; + if (out_type_flag) + *out_type_flag = ce->type_flag; + } + return TRUE; +} + +/* + * Insert the filter info node using insertion sort. + * If filter node for a given paddr is aready present then update the size + * and delete the fl_info node passed. + * + * Return 1 on successfull insertion. + * Return 0 if filter node with same paddr is found. + */ +static int +insert_filter_info(struct filter_info *fl_info) +{ + struct filter_info *prev = NULL; + struct filter_info *ptr = filter_info; + + if (!ptr) { + filter_info = fl_info; + return 1; + } + + while (ptr) { + if (fl_info->paddr <= ptr->paddr) + break; + prev = ptr; + ptr = ptr->next; + } + if (ptr && (fl_info->paddr == ptr->paddr)) { + if (fl_info->size > ptr->size) + ptr->size = fl_info->size; + free(fl_info); + return 0; + } + + if (prev) { + fl_info->next = ptr; + prev->next = fl_info; + } + else { + fl_info->next = filter_info; + filter_info = fl_info; + } + return 1; +} + +/* + * Create an erase info node for each erase command. One node per erase + * command even if it is part of loop construct. + * For erase commands that are not part of loop construct, the num_sizes will + * always be 1 + * For erase commands that are part of loop construct, the num_sizes may be + * 1 or >1 depending on number iterations. This function will called multiple + * times depending on iterations. At first invokation create a node and + * increment num_sizes for subsequent invokations. + * + * The valid erase info node starts from index value 1. (index 0 is invalid + * index). + * + * Index 0 1 2 3 + * +------+--------+--------+--------+ + * erase_info->|Unused| | | |...... + * +------+--------+--------+--------+ + * | . . ..... + * V + * +---------+ + * | char* |----> Original erase command string + * +---------+ + * |num_sizes| + * +---------+ +--+--+--+ + * | sizes |----> | | | |... Sizes array of num_sizes + * +---------+ +--+--+--+ + * + * On success, return the index value of erase node for given erase command. + * On failure, return 0. + */ +static int +add_erase_info_node(struct config_entry *filter_symbol) +{ + int idx = filter_symbol->erase_info_idx; + + /* + * Check if node is already created, if yes, increment the num_sizes. + */ + if (idx) { + erase_info[idx].num_sizes++; + return idx; + } + + /* Allocate a new node. */ + DEBUG_MSG("Allocating new erase info node for command \"%s\"\n", + filter_symbol->symbol_expr); + idx = num_erase_info++; + erase_info = realloc(erase_info, + sizeof(struct erase_info) * num_erase_info); + if (!erase_info) { + ERRMSG("Can't get memory to create erase information.\n"); + return 0; + } + + memset(&erase_info[idx], 0, sizeof(struct erase_info)); + erase_info[idx].symbol_expr = filter_symbol->symbol_expr; + erase_info[idx].num_sizes = 1; + + filter_symbol->symbol_expr = NULL; + filter_symbol->erase_info_idx = idx; + + return idx; +} + +/* Return the index value in sizes array for given erase command index. */ +static inline int +get_size_index(int ei_idx) +{ + if (ei_idx) + return erase_info[ei_idx].num_sizes - 1; + return 0; +} + +static int +update_filter_info(struct config_entry *filter_symbol, + struct config_entry *size_symbol) +{ + unsigned long long sym_addr; + long size; + struct filter_info *fl_info; + + sym_addr = get_config_symbol_addr(filter_symbol, 0, NULL); + if (message_level & ML_PRINT_DEBUG_MSG) + print_config_entry(filter_symbol); + if (!sym_addr) + return FALSE; + + if (filter_symbol->nullify) + size = get_pointer_size(); + else if (size_symbol) { + size = get_config_symbol_size(size_symbol, 0, NULL); + if (message_level & ML_PRINT_DEBUG_MSG) + print_config_entry(size_symbol); + } else + size = get_config_symbol_size(filter_symbol, 0, NULL); + + if (size <= 0) + return FALSE; + + if ((fl_info = calloc(1, sizeof(struct filter_info))) == NULL) { + ERRMSG("Can't allocate filter info\n"); + return FALSE; + } + fl_info->vaddr = sym_addr; + fl_info->paddr = vaddr_to_paddr(sym_addr); + fl_info->size = size; + fl_info->nullify = filter_symbol->nullify; + fl_info->erase_ch = 'X'; + + if (insert_filter_info(fl_info)) { + fl_info->erase_info_idx = add_erase_info_node(filter_symbol); + fl_info->size_idx = get_size_index(fl_info->erase_info_idx); + } + return TRUE; +} + +int +update_filter_info_raw(unsigned long long sym_addr, int ch, int len) +{ + struct filter_info *fl_info; + + fl_info = calloc(1, sizeof(struct filter_info)); + if (fl_info == NULL) { + ERRMSG("Can't allocate filter info\n"); + return FALSE; + } + + fl_info->vaddr = sym_addr; + fl_info->paddr = vaddr_to_paddr(sym_addr); + fl_info->size = len; + fl_info->nullify = 0; + fl_info->erase_ch = ch; + + if (insert_filter_info(fl_info)) { + /* TODO + * Add support to update erase information to the + * resulting dump file + */ + fl_info->erase_info_idx = 0; + fl_info->size_idx = 0; + } + return TRUE; +} + +static int +initialize_iteration_entry(struct config_entry *ie, + char *type_name, unsigned char type_flag) +{ + if (!(ie->flag & ITERATION_ENTRY)) + return FALSE; + + if (type_flag & TYPE_LIST_HEAD) { + if (!ie->type_name) { + ERRMSG("Config error at %d: Use 'within' keyword " + "to specify StructName:ListHeadMember.\n", + ie->line); + return FALSE; + } + /* + * If the LIST entry is of list_head type and user has not + * specified the member name where iteration entry is hooked + * on to list_head, then we default to member name 'list'. + */ + if (!ie->next) { + ie->next = create_config_entry("list", ITERATION_ENTRY, + ie->line); + ie->next->flag &= ~SYMBOL_ENTRY; + } + + /* + * For list_head find out the size of the StructName and + * populate ie->size now. For array and link list we get the + * size info from config entry returned by + * get_next_list_entry(). + */ + ie->size = get_structure_size(ie->type_name, 0); + if (ie->size == FAILED_DWARFINFO) { + ERRMSG("Config error at %d: " + "Can't get size for type: %s.\n", + ie->line, ie->type_name); + return FALSE; + + } else if (ie->size == NOT_FOUND_STRUCTURE) { + ERRMSG("Config error at %d: " + "Can't find structure: %s.\n", + ie->line, ie->type_name); + return FALSE; + } + + if (!resolve_config_entry(ie->next, 0, ie->type_name)) + return FALSE; + + if (strcmp(ie->next->type_name, "list_head")) { + ERRMSG("Config error at %d: " + "Member '%s' is not of 'list_head' type.\n", + ie->next->line, ie->next->name); + return FALSE; + } + ie->type_flag = TYPE_STRUCT; + } else { + if (ie->type_name) { + /* looks like user has used 'within' keyword for + * non-list_head VAR. Print the warning and continue. + */ + ERRMSG("Warning: line %d: 'within' keyword not " + "required for ArrayVar/StructVar.\n", ie->line); + free(ie->type_name); + + /* remove the next list_head member from iteration + * entry that would have added as part of 'within' + * keyword processing. + */ + if (ie->next) { + free_config_entry(ie->next); + ie->next = NULL; + } + } + /* + * Set type flag for iteration entry. The iteration entry holds + * individual element from array/list, hence strip off the + * array type flag bit. + */ + ie->type_name = strdup(type_name); + ie->type_flag = type_flag; + ie->type_flag &= ~TYPE_ARRAY; + } + return TRUE; +} + +static int +list_entry_empty(struct config_entry *le, struct config_entry *ie) +{ + struct config_entry ce; + + /* Error out if arguments are not correct */ + if (!(le->flag & LIST_ENTRY) || !(ie->flag & ITERATION_ENTRY)) { + ERRMSG("Invalid arguments\n"); + return TRUE; + } + + memset(&ce, 0, sizeof(struct config_entry)); + /* get next available entry from LIST entry. */ + if (!get_next_list_entry(le, 0, NULL, &ce)) + return TRUE; + + if (ie->next) { + /* we are dealing with list_head */ + ie->next->vaddr = ce.vaddr; + ie->vaddr = ce.vaddr - ie->next->offset; + } else { + ie->vaddr = ce.vaddr; + ie->sym_addr = ce.sym_addr; + ie->size = ce.size; + } + return FALSE; +} + +/* + * Process the config entry that has been read by get_config. + * return TRUE on success + */ +static int +process_config(struct config *config) +{ + int i; + unsigned char type_flag; + char *type_name = NULL; + + if (config->list_entry) { + /* + * We are dealing with 'for' command. + * - First resolve list entry. + * - Initialize iteration entry for iteration. + * - Populate iteration entry untill list entry empty. + */ + if (!resolve_list_entry(config->list_entry, 0, NULL, + &type_name, &type_flag)) { + return FALSE; + } + if (!initialize_iteration_entry(config->iter_entry, + type_name, type_flag)) { + return FALSE; + } + + while (!list_entry_empty(config->list_entry, + config->iter_entry)) { + for (i = 0; i < config->num_filter_symbols; i++) + update_filter_info(config->filter_symbol[i], + config->size_symbol[i]); + } + } else + update_filter_info(config->filter_symbol[0], + config->size_symbol[0]); + + return TRUE; +} + +static void +print_filter_info() +{ + struct filter_info *fl_info = filter_info; + + DEBUG_MSG("\n"); + while (fl_info) { + DEBUG_MSG("filter address: paddr (%llx), sym_addr (%llx)," + " Size (%ld)\n", + fl_info->paddr, fl_info->vaddr, fl_info->size); + fl_info = fl_info->next; + } +} + +static void +init_filter_config() +{ + filter_config.name_filterconfig = info->name_filterconfig; + filter_config.file_filterconfig = info->file_filterconfig; + filter_config.saved_token = NULL; + filter_config.token = NULL; + filter_config.cur_module = NULL; + filter_config.new_section = 0; + filter_config.line_count = 0; +} + +/* + * Read and process each config entry (filter commands) from filter config + * file. If no module debuginfo found for specified module section then skip + * to next module section. + */ +static int +process_config_file(const char *name_config) +{ + struct config *config; + int skip_section = 0; + + if (!name_config) + return FALSE; + + if ((info->file_filterconfig = fopen(name_config, "r")) == NULL) { + ERRMSG("Can't open config file(%s). %s\n", + name_config, strerror(errno)); + return FALSE; + } + + init_filter_config(); + + while((config = get_config(skip_section)) != NULL) { + skip_section = 0; + if (config->module_name && + strcmp(config->module_name, "vmlinux")) { + /* + * if Module debuginfo is not available, then skip to + * next module section. + */ + if (!set_dwarf_debuginfo(config->module_name, + info->system_utsname.release, NULL, -1)) { + ERRMSG("Skipping to next Module section\n"); + skip_section = 1; + free_config(config); + continue; + } + } else { + set_dwarf_debuginfo("vmlinux", NULL, + info->name_vmlinux, info->fd_vmlinux); + } + process_config(config); + free_config(config); + } + + fclose(info->file_filterconfig); + print_filter_info(); + return TRUE; +} + +/* + * Search for symbol in modules as well as vmlinux + */ +unsigned long long +get_symbol_addr_all(char *name) { + + short vmlinux_searched = 0; + unsigned long long symbol_addr = 0; + unsigned int i, current_mod; + struct module_info *modules; + + /* Search in vmlinux if debuginfo is set to vmlinux */ + if (!strcmp(get_dwarf_module_name(), "vmlinux")) { + symbol_addr = get_symbol_addr(name); + if (symbol_addr) + return symbol_addr; + + vmlinux_searched = 1; + } + + /* + * Proceed the search in modules. Try in the module + * which resulted in a hit in the previous search + */ + + modules = mod_st.modules; + current_mod = mod_st.current_mod; + + if (strcmp(get_dwarf_module_name(), modules[current_mod].name)) { + if (!set_dwarf_debuginfo(modules[current_mod].name, + info->system_utsname.release, NULL, -1)) { + ERRMSG("Cannot set to current module %s\n", + modules[current_mod].name); + return NOT_FOUND_SYMBOL; + } + } + + symbol_addr = find_module_symbol(&modules[current_mod], name); + if (symbol_addr) + return symbol_addr; + + /* Search in all modules */ + for (i = 0; i < mod_st.num_modules; i++) { + + /* Already searched. Skip */ + if (i == current_mod) + continue; + + if (!set_dwarf_debuginfo(modules[i].name, + info->system_utsname.release, NULL, -1)) { + ERRMSG("Skipping Module section %s\n", modules[i].name); + continue; + } + + symbol_addr = find_module_symbol(&modules[i], name); + + if (!symbol_addr) + continue; + + /* + * Symbol found. Set the current_mod to this module index, a + * minor optimization for fast lookup next time + */ + mod_st.current_mod = i; + return symbol_addr; + } + + /* Symbol not found in any module. Set debuginfo back to vmlinux */ + set_dwarf_debuginfo("vmlinux", NULL, info->name_vmlinux, + info->fd_vmlinux); + + /* + * Search vmlinux if not already searched. This can happen when + * this function is called with debuginfo set to a particular + * kernel module and we are looking for symbol in vmlinux + */ + if (!vmlinux_searched) + return get_symbol_addr(name); + else + return NOT_FOUND_SYMBOL; +} + + +/* + * Search for domain in modules as well as vmlinux + */ +long +get_domain_all(char *symname, int cmd, unsigned long long *die) { + + short vmlinux_searched = 0; + long size = 0; + unsigned int i, current_mod; + struct module_info *modules; + + /* Search in vmlinux if debuginfo is set to vmlinux */ + if (!strcmp(get_dwarf_module_name(), "vmlinux")) { + size = get_domain(symname, cmd, die); + if (size > 0 && die) + return size; + + vmlinux_searched = 1; + } + + /* + * Proceed the search in modules. Try in the module + * which resulted in a hit in the previous search + */ + + modules = mod_st.modules; + current_mod = mod_st.current_mod; + + if (strcmp(get_dwarf_module_name(), modules[current_mod].name)) { + if (!set_dwarf_debuginfo(modules[current_mod].name, + info->system_utsname.release, NULL, -1)) { + ERRMSG("Cannot set to current module %s\n", + modules[current_mod].name); + return NOT_FOUND_STRUCTURE; + } + } + + size = get_domain(symname, cmd, die); + if (size > 0 && die) + return size; + + /* Search in all modules */ + for (i = 0; i < mod_st.num_modules; i++) { + + /* Already searched. Skip */ + if (i == current_mod) + continue; + + if (!set_dwarf_debuginfo(modules[i].name, + info->system_utsname.release, NULL, -1)) { + ERRMSG("Skipping Module section %s\n", modules[i].name); + continue; + } + + size = get_domain(symname, cmd, die); + + if (size <= 0 || !die) + continue; + + /* + * Domain found. Set the current_mod to this module index, a + * minor optimization for fast lookup next time + */ + mod_st.current_mod = i; + return size; + } + + /* Domain not found in any module. Set debuginfo back to vmlinux */ + set_dwarf_debuginfo("vmlinux", NULL, info->name_vmlinux, + info->fd_vmlinux); + + if (!vmlinux_searched) + return get_domain(symname, cmd, die); + else + return NOT_FOUND_STRUCTURE; +} + +/* + * Search for die in modules as well as vmlinux + */ +int +get_die_nfields_all(unsigned long long die_off) +{ + short vmlinux_searched = 0; + long nfields = -1; + unsigned int i, current_mod; + struct module_info *modules; + + /* Search in vmlinux if debuginfo is set to vmlinux */ + if (!strcmp(get_dwarf_module_name(), "vmlinux")) { + nfields = get_die_nfields(die_off); + if (nfields > 0) + return nfields; + + vmlinux_searched = 1; + } + + /* + * Proceed the search in modules. Try in the module + * which resulted in a hit in the previous search + */ + + modules = mod_st.modules; + current_mod = mod_st.current_mod; + + if (strcmp(get_dwarf_module_name(), modules[current_mod].name)) { + if (!set_dwarf_debuginfo(modules[current_mod].name, + info->system_utsname.release, NULL, -1)) { + ERRMSG("Cannot set to current module %s\n", + modules[current_mod].name); + return -1; + } + } + + nfields = get_die_nfields(die_off); + if (nfields > 0) + return nfields; + + /* Search in all modules */ + for (i = 0; i < mod_st.num_modules; i++) { + + /* Already searched. Skip */ + if (i == current_mod) + continue; + + if (!set_dwarf_debuginfo(modules[i].name, + info->system_utsname.release, NULL, -1)) { + ERRMSG("Skipping Module section %s\n", modules[i].name); + continue; + } + + nfields = get_die_nfields(die_off); + + if (nfields < 0) + continue; + + /* + * Die found. Set the current_mod to this module index, + * a minor optimization for fast lookup next time + */ + mod_st.current_mod = i; + return nfields; + } + + /* Die not found in any module. Set debuginfo back to vmlinux */ + set_dwarf_debuginfo("vmlinux", NULL, info->name_vmlinux, + info->fd_vmlinux); + + if (!vmlinux_searched) + return get_die_nfields(die_off); + else + return -1; + +} + +/* + * Search for die member in modules as well as vmlinux + */ +int +get_die_member_all(unsigned long long die_off, int index, long *offset, + char **name, int *nbits, int *fbits, unsigned long long *m_die) +{ + short vmlinux_searched = 0; + long size = -1; + unsigned int i, current_mod; + struct module_info *modules; + + /* Search in vmlinux if debuginfo is set to vmlinux */ + if (!strcmp(get_dwarf_module_name(), "vmlinux")) { + size = get_die_member(die_off, index, offset, name, + nbits, fbits, m_die); + if (size >= 0) + return size; + + vmlinux_searched = 1; + } + + /* + * Proceed the search in modules. Try in the module + * which resulted in a hit in the previous search + */ + + modules = mod_st.modules; + current_mod = mod_st.current_mod; + + if (strcmp(get_dwarf_module_name(), modules[current_mod].name)) { + if (!set_dwarf_debuginfo(modules[current_mod].name, + info->system_utsname.release, NULL, -1)) { + ERRMSG("Cannot set to current module %s\n", + modules[current_mod].name); + return -1; + } + } + + size = get_die_member(die_off, index, offset, name, + nbits, fbits, m_die); + if (size >= 0) + return size; + + /* Search in all modules */ + for (i = 0; i < mod_st.num_modules; i++) { + + /* Already searched. Skip */ + if (i == current_mod) + continue; + + if (!set_dwarf_debuginfo(modules[i].name, + info->system_utsname.release, NULL, -1)) { + ERRMSG("Skipping Module section %s\n", modules[i].name); + continue; + } + + size = get_die_member(die_off, index, offset, name, + nbits, fbits, m_die); + + if (size < 0) + continue; + + /* + * Die member found. Set the current_mod to this module index, + * a minor optimization for fast lookup next time + */ + mod_st.current_mod = i; + return size; + } + + /* Die member not found in any module. Set debuginfo back to vmlinux */ + set_dwarf_debuginfo("vmlinux", NULL, info->name_vmlinux, + info->fd_vmlinux); + + if (!vmlinux_searched) + return get_die_member(die_off, index, offset, name, + nbits, fbits, m_die); + else + return -1; +} + +/* Process the eppic macro using eppic library */ +static int +process_eppic_file(char *name_config) +{ + void *handle; + void (*eppic_load)(char *), (*eppic_unload)(char *); + int (*eppic_init)(); + + /* + * Dynamically load the eppic_makedumpfile.so library. + */ + handle = dlopen("eppic_makedumpfile.so", RTLD_LAZY); + if (!handle) { + ERRMSG("dlopen failed: %s\n", dlerror()); + return FALSE; + } + + /* TODO + * Support specifying eppic macros in makedumpfile.conf file + */ + + eppic_init = dlsym(handle, "eppic_init"); + if (!eppic_init) { + ERRMSG("Could not find eppic_init function\n"); + return FALSE; + } + + eppic_load = dlsym(handle, "eppic_load"); + if (!eppic_load) { + ERRMSG("Could not find eppic_load function\n"); + return FALSE; + } + + eppic_unload = dlsym(handle, "eppic_unload"); + if (!eppic_unload) + ERRMSG("Could not find eppic_unload function\n"); + + if (eppic_init(&eppic_cb)) { + ERRMSG("Init failed \n"); + return FALSE; + } + + /* Load/compile, execute and unload the eppic macro */ + eppic_load(name_config); + eppic_unload(name_config); + + if (dlclose(handle)) + ERRMSG("dlclose failed: %s\n", dlerror()); + + return TRUE; +} + +static void +split_filter_info(struct filter_info *prev, unsigned long long next_paddr, + size_t size) +{ + struct filter_info *new; + + if ((new = calloc(1, sizeof(struct filter_info))) == NULL) { + ERRMSG("Can't allocate memory to split filter info\n"); + return; + } + + /* + * copy over existing data from prev node and only update fields + * that differ. This approach will take care of copying over of any + * future member addition to filter_info structure. + */ + *new = *prev; + new->paddr = next_paddr; + new->size = size; + prev->next = new; +} + +static void +update_erase_info(struct filter_info *fi) +{ + struct erase_info *ei; + + if (!fi->erase_info_idx) + return; + + ei = &erase_info[fi->erase_info_idx]; + + if (!ei->sizes) { + /* First time, allocate sizes array */ + ei->sizes = calloc(ei->num_sizes, sizeof(long)); + if (!ei->sizes) { + ERRMSG("Can't allocate memory for erase info sizes\n"); + return; + } + } + ei->erased = 1; + if (!fi->nullify) + ei->sizes[fi->size_idx] += fi->size; + else + ei->sizes[fi->size_idx] = -1; +} + +static int +extract_filter_info(unsigned long long start_paddr, + unsigned long long end_paddr, + struct filter_info *fl_info) +{ + struct filter_info *fi = filter_info; + struct filter_info *prev = NULL; + size_t size1, size2; + + if (!fl_info) + return FALSE; + + while (fi) { + if ((fi->paddr >= start_paddr) && (fi->paddr < end_paddr)) { + size1 = end_paddr - fi->paddr; + if (fi->size <= size1) + break; + size2 = fi->size - size1; + fi->size = size1; + split_filter_info(fi, fi->paddr + size1, size2); + break; + } + prev = fi; + fi = fi->next; + } + if (!fi) + return FALSE; + + *fl_info = *fi; + fl_info->next = NULL; + + /* Delete this node */ + if (!prev) + filter_info = fi->next; + else + prev->next = fi->next; + update_erase_info(fi); + free(fi); + + return TRUE; +} + +/* + * External functions. + */ +int +gather_filter_info(void) +{ + int ret = TRUE; + + /* + * Before processing filter config file, load the symbol data of + * loaded modules from vmcore. + */ + set_dwarf_debuginfo("vmlinux", NULL, + info->name_vmlinux, info->fd_vmlinux); + if (!load_module_symbols()) + return FALSE; + + /* + * XXX: We support specifying both makedumpfile.conf and + * eppic macro at the same time. Whether to retain or discard the + * functionality provided by makedumpfile.conf is open for + * discussion + */ + if (info->name_filterconfig) + ret = process_config_file(info->name_filterconfig); + + if (info->name_eppic_config) + ret &= process_eppic_file(info->name_eppic_config); + + /* + * Remove modules symbol information, we dont need now. + * Reset the dwarf debuginfo to vmlinux to close open file + * descripter of module debuginfo file, if any. + */ + clean_module_symbols(); + set_dwarf_debuginfo("vmlinux", NULL, + info->name_vmlinux, info->fd_vmlinux); + return ret; +} + +void +clear_filter_info(void) +{ + struct filter_info *prev, *fi = filter_info; + int i; + + /* Delete filter_info nodes that are left out. */ + while (fi) { + prev = fi; + fi = fi->next; + free(prev); + } + filter_info = NULL; + + if (erase_info == NULL) + return; + + for (i = 1; i < num_erase_info; i++) { + free(erase_info[i].symbol_expr); + free(erase_info[i].sizes); + } + free(erase_info); + erase_info = NULL; +} + +/* + * Filter buffer if the physical address is in filter_info. + */ +void +filter_data_buffer(unsigned char *buf, unsigned long long paddr, + size_t size) +{ + struct filter_info fl_info; + unsigned char *buf_ptr; + + while (extract_filter_info(paddr, paddr + size, &fl_info)) { + buf_ptr = buf + (fl_info.paddr - paddr); + if (fl_info.nullify) + memset(buf_ptr, 0, fl_info.size); + else + memset(buf_ptr, fl_info.erase_ch, fl_info.size); + } +} + +/* + * Filter buffer if the physical address is in filter_info. + */ +void +filter_data_buffer_parallel(unsigned char *buf, unsigned long long paddr, + size_t size, pthread_mutex_t *mutex) +{ + struct filter_info fl_info; + unsigned char *buf_ptr; + int found = FALSE; + + while (TRUE) { + pthread_mutex_lock(mutex); + found = extract_filter_info(paddr, paddr + size, &fl_info); + pthread_mutex_unlock(mutex); + + if (found) { + buf_ptr = buf + (fl_info.paddr - paddr); + if (fl_info.nullify) + memset(buf_ptr, 0, fl_info.size); + else + memset(buf_ptr, fl_info.erase_ch, fl_info.size); + } else { + break; + } + } +} + +unsigned long +get_size_eraseinfo(void) +{ + unsigned long size_eraseinfo = 0; + char size_str[MAX_SIZE_STR_LEN]; + struct erase_info *ei; + struct filter_info *fl_info = filter_info; + + while (fl_info) { + + if (!fl_info->erase_info_idx) + continue; + ei = &erase_info[fl_info->erase_info_idx]; + if (fl_info->nullify) + sprintf(size_str, "nullify\n"); + else + sprintf(size_str, "size %ld\n", fl_info->size); + + size_eraseinfo += strlen("erase ") + + strlen(ei->symbol_expr) + 1 + + strlen(size_str); + fl_info = fl_info->next; + } + + return size_eraseinfo; +} + diff --git a/erase_info.h b/erase_info.h new file mode 100644 index 0000000..b363a40 --- /dev/null +++ b/erase_info.h @@ -0,0 +1,69 @@ +/* + * erase_info.h + * + * Created by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> + * + * Copyright (C) 2011 IBM Corporation + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _ERASE_INFO_H +#define _ERASE_INFO_H + +#define MAX_SIZE_STR_LEN (26) + +/* + * Erase information, original symbol expressions. + */ +struct erase_info { + char *symbol_expr; + int num_sizes; + long *sizes; + int erased; /* 1= erased, 0= Not erased */ +}; + +unsigned long long get_symbol_addr_all(char *); +long get_domain_all(char *, int, unsigned long long *); +int get_die_member_all(unsigned long long die_off, int index, long *offset, + char **name, int *nbits, int *fbits, unsigned long long *m_die); +int get_die_nfields_all(unsigned long long die_off); + +struct call_back { + long (*get_domain_all)(char *, int, unsigned long long *); + int (*readmem)(int type_addr, unsigned long long addr, void *bufptr, + size_t size); + int (*get_die_attr_type)(unsigned long long die_off, int *type_flag, + unsigned long long *die_attr_off); + char * (*get_die_name)(unsigned long long die_off); + unsigned long long (*get_die_offset)(char *sysname); + int (*get_die_length)(unsigned long long die_off, int flag); + int (*get_die_member_all)(unsigned long long die_off, int index, + long *offset, char **name, int *nbits, int *fbits, + unsigned long long *m_die); + int (*get_die_nfields_all)(unsigned long long die_off); + unsigned long long (*get_symbol_addr_all)(char *symname); + int (*update_filter_info_raw)(unsigned long long, int, int); +}; + +extern struct erase_info *erase_info; +extern unsigned long num_erase_info; + +int gather_filter_info(void); +void clear_filter_info(void); +void filter_data_buffer(unsigned char *buf, unsigned long long paddr, size_t size); +void filter_data_buffer_parallel(unsigned char *buf, unsigned long long paddr, + size_t size, pthread_mutex_t *mutex); +unsigned long get_size_eraseinfo(void); +int update_filter_info_raw(unsigned long long, int, int); + +#endif /* _ERASE_INFO_H */ + diff --git a/extension_eppic.c b/extension_eppic.c new file mode 100644 index 0000000..45bc032 --- /dev/null +++ b/extension_eppic.c @@ -0,0 +1,468 @@ +/* + * extension_eppic.c + * + * Created by: Aravinda Prasad <aravinda@linux.vnet.ibm.com> + * + * Copyright (C) 2012, 2013 IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <fcntl.h> +#include <dwarf.h> + +#include "makedumpfile.h" +#include "extension_eppic.h" + +static int apigetctype(int, char *, type_t *); + +/* + * Most of the functions included in this file performs similar + * functionality as in the applications/crash/eppic.c file part of + * eppic, but uses DWARF instead of gdb. Few of the functions are + * reused directly which are acknowledged in the comment before the + * function. + */ + +/* + * This is the call back function called when a new eppic macro is + * loaded. This will execute the loaded eppic macro. + * + * "fname" is considered as the entry point of an eppic macro only if + * the following functions are defined: + * + * fname_help() + * fname_usage() + * + * These functions have no relevance in makedumpfile context as + * makedumpfile automatically executes the eppic macro by calling the + * entry point and user will not have any option to execute the usage + * or help functions. However they are required to identify the entry + * points in the eppic macro. + */ +void +reg_callback(char *name, int load) +{ + char fname[MAX_SYMNAMELEN]; + + /* Nothing to process for unload request */ + if (!load) + return; + + snprintf(fname, sizeof(fname), "%s_help", name); + if (eppic_chkfname(fname, 0)) { + snprintf(fname, sizeof(fname), "%s_usage", name); + if (eppic_chkfname(fname, 0)) + eppic_cmd(name, NULL, 0); + } + return; +} + +/* + * This function is a copy of eppic_setupidx() function in + * applications/crash/eppic.c file from eppic source code + * repository. + * + * set idx value to actual array indexes from specified size + */ +static void +eppic_setupidx(TYPE_S *t, int ref, int nidx, int *idxlst) +{ + /* put the idxlst in index size format */ + if (nidx) { + int i; + for (i = 0; i < nidx - 1; i++) { + /* kludge for array dimensions of [1] */ + if (idxlst[i + 1] == 0) + idxlst[i + 1] = 1; + idxlst[i] = idxlst[i] / idxlst[i + 1]; + } + + /* divide by element size for last element bound */ + if (ref) + idxlst[i] /= eppic_defbsize(); + else + idxlst[i] /= eppic_type_getsize(t); + eppic_type_setidxlst(t, idxlst); + } +} + +/* + * Call back functions for eppic to query the dump image + */ + +static int +apigetmem(ull iaddr, void *p, int nbytes) +{ + return READMEM(VADDR, iaddr, p, nbytes); +} + +static int +apiputmem(ull iaddr, void *p, int nbytes) +{ + return 1; +} + +/* + * Drill down the type of the member and update eppic with information + * about the member + */ +static char * +drilldown(ull offset, type_t *t) +{ + int type_flag, len = 0, t_len = 0, nidx = 0; + int fctflg = 0, ref = 0, *idxlst = 0; + ull die_off = offset, t_die_off; + char *tstr = NULL, *tstr_dup = NULL; + + while (GET_DIE_ATTR_TYPE(die_off, &type_flag, &t_die_off)) { + switch (type_flag) { + /* typedef inserts a level of reference to the actual type */ + case DW_TAG_pointer_type: + ref++; + die_off = t_die_off; + /* + * This could be a void *, in which case the drill + * down stops here + */ + if (!GET_DIE_ATTR_TYPE(die_off, &type_flag, + &t_die_off)) { + /* make it a char* */ + eppic_parsetype("char", t, ref); + return eppic_strdup(""); + } + break; + /* Handle pointer to function */ + case DW_TAG_subroutine_type: + fctflg = 1; + die_off = t_die_off; + break; + /* Handle arrays */ + case DW_TAG_array_type: + if (!idxlst) { + idxlst = eppic_calloc(sizeof(int) * \ + (MAX_ARRAY_DIMENSION + 1)); + if (!idxlst) { + ERRMSG("Out of memory\n"); + return NULL; + } + } + if (nidx >= MAX_ARRAY_DIMENSION) { + ERRMSG("Too many array indexes. Max=%d\n", + MAX_ARRAY_DIMENSION); + return NULL; + } + + /* handle multi-dimensional array */ + len = GET_DIE_LENGTH(die_off, FALSE); + t_len = GET_DIE_LENGTH(t_die_off, FALSE); + if (len > 0 && t_len > 0) + idxlst[nidx++] = len / t_len; + die_off = t_die_off; + break; + /* Handle typedef */ + case DW_TAG_typedef: + die_off = t_die_off; + break; + case DW_TAG_base_type: + eppic_parsetype(tstr = GET_DIE_NAME(t_die_off), t, 0); + goto out; + case DW_TAG_union_type: + eppic_type_mkunion(t); + goto label; + case DW_TAG_enumeration_type: + eppic_type_mkenum(t); + goto label; + case DW_TAG_structure_type: + eppic_type_mkstruct(t); +label: + eppic_type_setsize(t, GET_DIE_LENGTH(t_die_off, TRUE)); + eppic_type_setidx(t, (ull)t_die_off); + tstr = GET_DIE_NAME(t_die_off); + /* Drill down further */ + if (tstr) + apigetctype(V_STRUCT, tstr, t); + die_off = 0; + break; + /* Unknown TAG ? */ + default: + die_off = t_die_off; + break; + } + } + +out: + eppic_setupidx(t, ref, nidx, idxlst); + if (fctflg) + eppic_type_setfct(t, 1); + eppic_pushref(t, ref + (nidx ? 1 : 0)); + tstr_dup = (tstr) ? eppic_strdup(tstr) : eppic_strdup(""); + /* Free the memory allocated by makedumpfile. */ + free(tstr); + return tstr_dup; +} + +/* + * Get the type, size and position information for a member of a structure. + */ +static char * +apimember(char *mname, ull idx, type_t *tm, member_t *m, ull *last_index) +{ + int index, nfields = -1, size; + int nbits = 0, fbits = 0; + long offset; + ull m_die, die_off = idx; + char *name = NULL; + + nfields = GET_DIE_NFIELDS_ALL(die_off); + /* + * GET_DIE_NFIELDS() returns < 0 if the die is not structure type + * or union type + */ + if (nfields <= 0) + return NULL; + + /* if we're being asked the next member in a getfirst/getnext + * sequence + */ + if (mname && !mname[0] && last_index && (*last_index)) + index = *last_index; + else + index = 0; + + while (index < nfields) { + size = GET_DIE_MEMBER_ALL(die_off, index, &offset, &name, + &nbits, &fbits, &m_die); + + if (size < 0) + return NULL; + + if (!mname || !mname[0] || !strcmp(mname, name)) { + eppic_member_ssize(m, size); + if (name) { + eppic_member_sname(m, name); + /* + * Free the memory allocated by makedumpfile. + */ + free(name); + } + else + eppic_member_sname(m, ""); + eppic_member_soffset(m, offset); + eppic_member_snbits(m, nbits); + eppic_member_sfbit(m, fbits); + *last_index = index + 1; + return drilldown(m_die, tm); + } + index++; + } + return NULL; +} + +static int +apigetctype(int ctype, char *name, type_t *tout) +{ + long size = 0; + unsigned long long die = 0; + + switch (ctype) { + case V_TYPEDEF: + size = GET_DOMAIN_ALL(name, DWARF_INFO_GET_DOMAIN_TYPEDEF, + &die); + break; + case V_STRUCT: + size = GET_DOMAIN_ALL(name, DWARF_INFO_GET_DOMAIN_STRUCT, &die); + break; + case V_UNION: + size = GET_DOMAIN_ALL(name, DWARF_INFO_GET_DOMAIN_UNION, &die); + break; + /* TODO + * Implement for all the domains + */ + } + + if (size <= 0 || !die) + return 0; + + /* populate */ + eppic_type_settype(tout, ctype); + eppic_type_setsize(tout, size); + eppic_type_setidx(tout, (ull)(unsigned long)die); + eppic_pushref(tout, 0); + return 1; +} + +static char * +apigetrtype(ull idx, type_t *t) +{ + return drilldown(idx, t); +} + +static int +apialignment(ull idx) +{ + return 0; +} + +int +apigetval(char *name, ull *val, VALUE_S *value) +{ + ull ptr = 0; + + ptr = GET_SYMBOL_ADDR_ALL(name); + + if (!ptr) + return 0; + + *val = ptr; + + if (!value) + return 1; + + /* Support for fully typed symbol access */ + ull type; + TYPE_S *stype; + + type = GET_DIE_OFFSET(name); + stype = eppic_gettype(value); + + apigetrtype(type, stype); + + eppic_pushref(stype, 1); + eppic_setmemaddr(value, *val); + eppic_do_deref(1, value, value); + + *val = eppic_getval(value); + + if (!eppic_typeislocal(stype) && eppic_type_getidx(stype) > 100) { + char *tname = GET_DIE_NAME(eppic_type_getidx(stype)); + if (tname) { + eppic_chktype(stype, tname); + /* Free the memory allocated by makedumpfile. */ + free(tname); + } + } + return 1; +} + +static enum_t * +apigetenum(char *name) +{ + return 0; +} + +static def_t * +apigetdefs(void) +{ + return 0; +} + +static uint8_t +apigetuint8(void *ptr) +{ + uint8_t val; + if (!READMEM(VADDR, (unsigned long)ptr, (char *)&val, sizeof(val))) + return (uint8_t) -1; + return val; +} + +static uint16_t +apigetuint16(void *ptr) +{ + uint16_t val; + if (!READMEM(VADDR, (unsigned long)ptr, (char *)&val, sizeof(val))) + return (uint16_t) -1; + return val; +} + +static uint32_t +apigetuint32(void *ptr) +{ + uint32_t val; + if (!READMEM(VADDR, (unsigned long)ptr, (char *)&val, sizeof(val))) + return (uint32_t) -1; + return val; +} + +static uint64_t +apigetuint64(void *ptr) +{ + uint64_t val; + if (!READMEM(VADDR, (unsigned long)ptr, (char *)&val, sizeof(val))) + return (uint64_t) -1; + return val; +} + +static char * +apifindsym(char *p) +{ + return NULL; +} + +apiops icops = { + apigetmem, + apiputmem, + apimember, + apigetctype, + apigetrtype, + apialignment, + apigetval, + apigetenum, + apigetdefs, + apigetuint8, + apigetuint16, + apigetuint32, + apigetuint64, + apifindsym +}; + +/* Extensions to built-in functions */ +VALUE_S * +eppic_memset(VALUE_S *vaddr, VALUE_S *vch, VALUE_S *vlen) +{ + ull addr = eppic_getval(vaddr); + int len = eppic_getval(vlen); + int ch = eppic_getval(vch); + + /* + * Set the value at address from iaddr till iaddr + nbytes + * to the value specified in variable ch + */ + UPDATE_FILTER_INFO_RAW(addr, ch, len); + return eppic_makebtype(1); +} + + +/* Initialize eppic */ +int +eppic_init(void *fun_ptr) +{ + cb = (struct call_back *)fun_ptr; + + if (eppic_open() >= 0) { + + /* Register call back functions */ + eppic_apiset(&icops, 3, sizeof(long), 0); + + /* set the new function callback */ + eppic_setcallback(reg_callback); + + /* Extend built-in functions to include memset */ + eppic_builtin("int memset(char *, int, int)", + (bf_t *)eppic_memset); + + return 0; + } + return 1; +} + diff --git a/extension_eppic.h b/extension_eppic.h new file mode 100644 index 0000000..24189ba --- /dev/null +++ b/extension_eppic.h @@ -0,0 +1,95 @@ +/* + * extension_eppic.h + * + * Created by: Aravinda Prasad <aravinda@linux.vnet.ibm.com> + * + * Copyright (C) 2012, 2013 IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _EXTENSION_EPPIC_H +#define _EXTENSION_EPPIC_H + +#include "eppic_api.h" +#include "erase_info.h" + +/* + * MEMBER_S, ENUM_S, DEF_S and TYPE_S are extracts from eppic header + * file eppic.h. The reason for not including the eppic.h header file + * in this file is because, lot of things in eppic.h are not required + * for makedumpfile extension. + */ + +#define MAX_ARRAY_DIMENSION 16 + +/* member information */ +typedef MEMBER_S { + + char *name; + int offset; /* offset from top of structure */ + int size; /* size in bytes of the member or of the bit array */ + int fbit; /* fist bit (-1) is not a bit field */ + int nbits; /* number of bits for this member */ + int value; /* for a enum member, the corresponding value_t */ + +} member_t; + +/* list to hold enum constant information */ +typedef ENUM_S { + struct enum_s *next; + char *name; + int value; + +} enum_t; + +/* list of macro symbols and there corresponding value_ts */ +typedef DEF_S { + struct def_s *next; + char *name; + char *val; + +} def_t; + + +typedef TYPE_S { + int type; /* type_t of type_t */ + ull idx; /* index to basetype_t or ctype_t */ + int size; /* size of this item */ + /* ... next fields are use internally */ + int typattr; /* base type_t qualifiers */ + int ref; /* level of reference */ + int fct; /* 1 if function pointer */ + int *idxlst; /* points to list of indexes if array */ + ull rtype; /* type_t a reference refers too */ +} type_t; + +#define ERRMSG(x...) \ +do { \ + fprintf(stderr, __FUNCTION__); \ + fprintf(stderr, ": "); \ + fprintf(stderr, x); \ +} while (0) + + +struct call_back *cb; + +#define GET_DOMAIN_ALL cb->get_domain_all +#define READMEM cb->readmem +#define GET_DIE_ATTR_TYPE cb->get_die_attr_type +#define GET_DIE_NAME cb->get_die_name +#define GET_DIE_OFFSET cb->get_die_offset +#define GET_DIE_LENGTH cb->get_die_length +#define GET_DIE_MEMBER_ALL cb->get_die_member_all +#define GET_DIE_NFIELDS_ALL cb->get_die_nfields_all +#define GET_SYMBOL_ADDR_ALL cb->get_symbol_addr_all +#define UPDATE_FILTER_INFO_RAW cb->update_filter_info_raw + +#endif /* _EXTENSION_EPPIC_H */ diff --git a/makedumpfile-R.pl b/makedumpfile-R.pl new file mode 100644 index 0000000..7879d45 --- /dev/null +++ b/makedumpfile-R.pl @@ -0,0 +1,202 @@ +#!/usr/bin/perl + +# makedumpfile-R.pl +# +# Copyright (C) 2007, 2008 NEC Corporation +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +$name_dumpfile = @ARGV[0]; +$TRUE = 1; +$FALSE = 0; +$MAKEDUMPFILE_SIGNATURE = "makedumpfile"; +$MAX_SIZE_MDF_HEADER = 4096; +$TYPE_FLAT_HEADER = 1; +$END_FLAG_FLAT_HEADER = -1; + +print "Start re-arranging dump data of flattened format to a dumpfile.\n"; +open(FILE_DUMPFILE, ">$name_dumpfile") || die "Cannot open $name_dumpfile.\n"; +binmode(FILE_DUMPFILE); + +$value_64bits = &is_64bits_system; + +if (&rearrange_dumpdata == $TRUE) { + printf "The dumpfile is saved to $name_dumpfile.\n"; + printf "Completed.\n"; +} else { + printf "Failed.\n"; +} +close(FILE_DUMPFILE); +# End + + +# Re-arrange dump data of flattened format from a standard input. +sub rearrange_dumpdata { + if (&read_start_flat_header != $TRUE) { + return $FALSE; + } + if ($value_64bits == $TRUE) { + $ret_seek = &seek_for_64bits_system(); + } else { + $ret_seek = &seek_for_32bits_system(); + } + $buf_size = &get_buf_size(); + while (($ret_seek == $TRUE) && (0 < $buf_size)) { + &read_buf_from_stdin($buf_size); + if (syswrite(FILE_DUMPFILE, $buf, $buf_size) != $buf_size) { + print "Cannot write. $buf_size\n"; + return $FALSE; + } + if ($value_64bits == $TRUE) { + $ret_seek = &seek_for_64bits_system(); + } else { + $ret_seek = &seek_for_32bits_system(); + } + $buf_size = &get_buf_size(); + } + if (($ret_seek != $END_FLAG_FLAT_HEADER) || ($buf_size != $END_FLAG_FLAT_HEADER)) { + print "Cannot get valid end header of flattened format.\n"; + print "ret_seek = $ret_seek, buf_size = $buf_size\n"; + return $FALSE; + } + return $TRUE; +} + +sub read_start_flat_header { + &read_buf_from_stdin($MAX_SIZE_MDF_HEADER); + if (index($buf, $MAKEDUMPFILE_SIGNATURE) != 0) { + print "It is not flattened format.\n"; + return $FALSE; + } + return $TRUE; +} + +sub seek_for_64bits_system { + my $value = 0; + my ($high, $low) = &read_64bits; + + $value = &convert_2values_to_1value($high, $low); + if ($value < 0) { + return $value; + } + if (seek(FILE_DUMPFILE, $value, 0) == 0) { + print "Cannot seek.\n"; + return $FALSE; + } + return $TRUE; +} + +sub seek_for_32bits_system { + my ($high, $low) = &read_64bits; + + # On 32bits system, a normal value cannot explain the offset of + # large file(4GB or larger). For solving this problem, BigInt + # module is used. But this module makes speed down. + use Math::BigInt; + local $value = Math::BigInt->new(1); + + if ($high < 0x80000000) { + $value->blsft(32); + $value->bmul($high); + $value->badd($low); + } else { + # Negative value + $low = ($low ^ 0xffffffff); + $high = ($high ^ 0xffffffff); + $value->blsft(32); + $value->bmul($high); + $value->badd($low); + $value->badd(1); + $value->bneg(); + } + if ($value < 0) { + return $value; + } + if (seek(FILE_DUMPFILE, $value, 0) == 0) { + print "Cannot seek.\n"; + return $FALSE; + } + return $TRUE; +} + +# Get buf_size of flattened data header. +sub get_buf_size { + my ($high, $low) = &read_64bits; + return &convert_2values_to_1value($high, $low); +} + +# Convert 2 values to 1 value. +# This function should be called only if a value isn't over the size +# of system value. +sub convert_2values_to_1value { + my ($high, $low) = (@_[0], @_[1]); + my $value = 0; + if ($high < 0x80000000) { + $value = $high * (1 << 32) + $low; + } else { + # Negative value + $low = ($low ^ 0xffffffff); + $high = ($high ^ 0xffffffff); + $value = (-1) * ($high * (1 << 32) + $low + 1); + } + return $value; +} + +# Get 64bits of dump data. +# This function returns 2 values because a value of 32bits system cannot +# explain 64bits. +sub read_64bits { + my ($high, $low) = (0, 0); + &read_buf_from_stdin(8); + + # Separate 2 values because hex() cannot support 64bits on 32bits system. + my ($value1, $value2) = unpack("H8 H8", $buf); + $value1 = hex($value1); + $value2 = hex($value2); + if (is_bigendian() == $TRUE) { + $low = $value1; + $high = $value2; + } else { + $low = $value2; + $high = $value1; + } + return ($high, $low); +} + +# Get dump data of flattened format from a standard input. +sub read_buf_from_stdin { + my $buf_size = @_[0]; + my $read_size = 0; + while ($read_size < $buf_size) { + $read_size += sysread(STDIN, $buf, $buf_size - $read_size, $read_size); + } +} + +# Check 64/32bits system. +sub is_64bits_system { + my $temp1 = 1 << 31; + my $temp2 = 1 << 33; + if ($temp1 < $temp2) { + return $TRUE; + } + return $FALSE; +} + +# Check big/little endian. +sub is_bigendian { + my $value = pack("l", 1234); + $value = unpack("n", $value); + if ($value == 1234) { + return $TRUE; + } + return $FALSE; +} + diff --git a/makedumpfile.8 b/makedumpfile.8 new file mode 100644 index 0000000..8482134 --- /dev/null +++ b/makedumpfile.8 @@ -0,0 +1,661 @@ +.TH MAKEDUMPFILE 8 "3 Jul 2018" "makedumpfile v1.6.4" "Linux System Administrator's Manual" +.SH NAME +makedumpfile \- make a small dumpfile of kdump +.SH SYNOPSIS +\fBmakedumpfile\fR [\fIOPTION\fR] [\-x \fIVMLINUX\fR|\-i \fIVMCOREINFO\fR] \fIVMCORE\fR \fIDUMPFILE\fR +.br +\fBmakedumpfile\fR \-F [\fIOPTION\fR] [\-x \fIVMLINUX\fR|\-i \fIVMCOREINFO\fR] \fIVMCORE\fR +.br +\fBmakedumpfile\fR [\fIOPTION\fR] \-x \fIVMLINUX\fR [\-\-config \fIFILTERCONFIGFILE\fR] [\-\-eppic \fIEPPICMACRO\fR] \fIVMCORE\fR \fIDUMPFILE\fR +.br +\fBmakedumpfile\fR \-R \fIDUMPFILE\fR +.br +\fBmakedumpfile\fR \-\-split [\fIOPTION\fR] [\-x \fIVMLINUX\fR|\-i \fIVMCOREINFO\fR] \fIVMCORE\fR \fIDUMPFILE1\fR \fIDUMPFILE2\fR [\fIDUMPFILE3\fR ..] +.br +\fBmakedumpfile\fR [\fIOPTION\fR] [\-x \fIVMLINUX\fR|\-i \fIVMCOREINFO\fR] \-\-num\-threads \fITHREADNUM\fR \fIVMCORE\fR \fIDUMPFILE\fR +.br +\fBmakedumpfile\fR \-\-reassemble \fIDUMPFILE1\fR \fIDUMPFILE2\fR [\fIDUMPFILE3\fR ..] \fIDUMPFILE\fR +.br +\fBmakedumpfile\fR \-g \fIVMCOREINFO\fR \-x \fIVMLINUX\fR +.br +\fBmakedumpfile\fR [\fIOPTION\fR] [\-\-xen-syms \fIXEN-SYMS\fR|\-\-xen-vmcoreinfo \fIVMCOREINFO\fR] \fIVMCORE\fR \fIDUMPFILE\fR +.br +\fBmakedumpfile\fR \-\-dump-dmesg [\-\-partial-dmesg] [\-x \fIVMLINUX\fR|\-i \fIVMCOREINFO\fR] \fIVMCORE\fR \fILOGFILE\fR +.br +\fBmakedumpfile\fR [\fIOPTION\fR] \-x \fIVMLINUX\fR \-\-diskset=\fIVMCORE1\fR \-\-diskset=\fIVMCORE2\fR [\-\-diskset=\fIVMCORE3\fR ..] \fIDUMPFILE\fR +.br +.B makedumpfile +\-h +.br +.B makedumpfile +\-v +.br +.SH DESCRIPTION +.PP +With kdump, the memory image of the first kernel (called "panicked kernel") can +be taken as /proc/vmcore while the second kernel (called "kdump kernel" or +"capture kernel") is running. This document represents /proc/vmcore as +\fIVMCORE\fR. makedumpfile makes a small \fIDUMPFILE\fR by compressing dump +data or by excluding unnecessary pages for analysis, or both. makedumpfile +needs the first kernel's debug information, so that it can distinguish +unnecessary pages by analyzing how the first kernel uses the memory. +The information can be taken from \fIVMLINUX\fR or \fIVMCOREINFO\fR. +.PP +makedumpfile can exclude the following types of pages while copying +\fIVMCORE\fR to \fIDUMPFILE\fR, and a user can choose which type of pages will +be excluded. +.br +.B \- Pages filled with zero +.br +.B \- Cache pages without private flag (non-private cache) +.br +.B \- Cache pages with private flag (private cache) +.br +.B \- User process data pages +.br +.B \- Free pages +.PP +makedumpfile provides two \fIDUMPFILE\fR formats (the ELF format and the +kdump\-compressed format). By default, makedumpfile makes a \fIDUMPFILE\fR in +the kdump\-compressed format. The kdump\-compressed format is readable only with +the crash utility, and it can be smaller than the ELF format because of the +compression support. The ELF format is readable with GDB and the crash utility. +If a user wants to use GDB, \fIDUMPFILE\fR format has to be explicitly +specified to be the ELF format. +.PP +Apart from the exclusion of unnecessary pages mentioned above, makedumpfile +allows user to filter out targeted kernel data. The filter config file can +be used to specify kernel/module symbols and its members that need to be +filtered out through the erase command syntax. makedumpfile reads the filter +config and builds the list of memory addresses and its sizes after processing +filter commands. The memory locations that require to be filtered out are +then poisoned with character 'X' (58 in Hex). Refer to +\fBmakedumpfile.conf(5)\fR for file format. +.PP +Eppic macros can also be used to specify kernel symbols and its members that +need to be filtered. Eppic provides C semantics including language constructs +such as conditional statements, logical and arithmetic operators, functions, +nested loops to traverse and erase kernel data. --eppic requires +\fBeppic_makedumpfile.so\fR and eppic library. \fBeppic_makedumpfile.so\fR +can be built from makedumpfile source. Refer to +\fBhttp://code.google.com/p/eppic/\fR to build eppic library \fBlibeppic.a\fR +and for more information on writing eppic macros. +.PP +To analyze the first kernel's memory usage, makedumpfile can refer to +\fIVMCOREINFO\fR instead of \fIVMLINUX\fR. \fIVMCOREINFO\fR contains the first +kernel's information (structure size, field offset, etc.), and \fIVMCOREINFO\fR +is small enough to be included into the second kernel's initrd. +.br +If the second kernel is running on its initrd without mounting a root file +system, makedumpfile cannot refer to \fIVMLINUX\fR because the second kernel's +initrd cannot include a large file like \fIVMLINUX\fR. To solve the problem, +makedumpfile makes \fIVMCOREINFO\fR beforehand, and it refers to +\fIVMCOREINFO\fR instead of \fIVMLINUX\fR while the second kernel is running. +.br +\fIVMCORE\fR has contained \fIVMCOREINFO\fR since linux-2.6.24, and a user does +not need to specify neither -x nor -i option. +.PP +If the second kernel is running on its initrd without mounting any file system, +a user needs to transport the dump data to a remote host. To transport the dump +data by SSH, makedumpfile outputs the dump data in the intermediate format (the +flattened format) to the standard output. By piping the output data to SSH, +a user can transport the dump data to a remote host. Note that analysis tools +(crash utility before version 5.1.2 or GDB) cannot read the flattened format +directly, so on a remote host the received data in the flattened format needs +to be rearranged to a readable \fIDUMPFILE\fR format by makedumpfile (or makedumpfile\-R.pl). +.PP +makedumpfile can read a \fIDUMPFILE\fR in the kdump-compressed format instead +of \fIVMCORE\fR and re-filter it. This feature is useful in situation that +users need to reduce the file size of \fIDUMPFILE\fR for sending it somewhere +by ftp/scp/etc. (If all of the page types, which are specified by a new dump_level, +are excluded from an original \fIDUMPFILE\fR already, a new \fIDUMPFILE\fR is the +same as an original \fIDUMPFILE\fR.) +.br +For example, makedumpfile can create a \fIDUMPFILE\fR of dump_level 31 from the +one of dump_level 3 like the following: +.br +.B Example: +.br +# makedumpfile \-c \-d 3 /proc/vmcore dumpfile.1 +.br +# makedumpfile \-c \-d 31 dumpfile.1 dumpfile.2 +.PP +makedumpfile can read \fIVMCORE\fR(s) in three kinds of sadump +formats: single partition format, diskset format and media backup +format, and can convert each of them into kdump-compressed format with +filtering and compression processing. Note that for \fIVMCORE\fR(s) +created by sadump, you always need to pass \fIVMLINUX\fR with -x +option. Also, to pass multiple \fIVMCORE\fRs created on diskset +configuration, you need to use --diskset option. + +.PP +.SH OPTIONS + +.TP +\fB\-c,\-l,\-p\fR +Compress dump data by each page using zlib for -c option, lzo for -l +option or snappy for -p option. +(-l option needs USELZO=on and -p option needs USESNAPPY=on when building) +.br +A user cannot specify this option with \-E option, because the ELF format does +not support compressed data. +.br +.B Example: +.br +# makedumpfile \-c \-d 31 \-x vmlinux /proc/vmcore dumpfile + +.TP +.BI \-d \ dump_level +Specify the type of unnecessary page for analysis. +.br +Pages of the specified type are not copied to \fIDUMPFILE\fR. The page type +marked in the following table is excluded. A user can specify multiple page +types by setting the sum of each page type for dump_level. The maximum of +dump_level is 31. Note that a dump_level for Xen dump filtering is 0 or 1 on +a machine other than x86_64. On a x86_64 machine, even 2 or bigger dump level +will be effective if you specify domain-0's \fIvmlinux\fR with \-x option. +Then the pages are excluded only from domain-0. +.br +If specifying multiple dump_levels with the delimiter ',', makedumpfile retries +to create a \fIDUMPFILE\fR by other dump_level when "No space on device" error +happens. For example, if dump_level is "11,31" and makedumpfile fails +by dump_level 11, makedumpfile retries it by dump_level 31. +.br +.B Example: +.br +# makedumpfile \-d 11 \-x vmlinux /proc/vmcore dumpfile +.br +# makedumpfile \-d 11,31 \-x vmlinux /proc/vmcore dumpfile +.br +.B Base level: +.br +dump_level consists of five bits, so there are five base levels to specify the type of unnecessary page. +.br + \fB 1\fR : Exclude the pages filled with zero. +.br + \fB 2\fR : Exclude the non-private cache pages. +.br + \fB 4\fR : Exclude all cache pages. +.br + \fB 8\fR : Exclude the user process data pages. +.br + \fB16\fR : Exclude the free pages. + +Here is the all combinations of the bits. + + | |non- | | | + dump | zero |private|private| user | free + level | page |cache |cache | data | page +.br +\-\-\-\-\-\-\-+\-\-\-\-\-\-+\-\-\-\-\-\-\-+\-\-\-\-\-\-\-+\-\-\-\-\-\-+\-\-\-\-\-\- + 0 | | | | | + 1 | X | | | | + 2 | | X | | | + 3 | X | X | | | + 4 | | X | X | | + 5 | X | X | X | | + 6 | | X | X | | + 7 | X | X | X | | + 8 | | | | X | + 9 | X | | | X | + 10 | | X | | X | + 11 | X | X | | X | + 12 | | X | X | X | + 13 | X | X | X | X | + 14 | | X | X | X | + 15 | X | X | X | X | + 16 | | | | | X + 17 | X | | | | X + 18 | | X | | | X + 19 | X | X | | | X + 20 | | X | X | | X + 21 | X | X | X | | X + 22 | | X | X | | X + 23 | X | X | X | | X + 24 | | | | X | X + 25 | X | | | X | X + 26 | | X | | X | X + 27 | X | X | | X | X + 28 | | X | X | X | X + 29 | X | X | X | X | X + 30 | | X | X | X | X + 31 | X | X | X | X | X + + +.TP +\fB\-E\fR +Create \fIDUMPFILE\fR in the ELF format. +.br +This option cannot be specified with the -c, -l or -p options, because +the ELF format does not support compressed data. +.br +.B Example: +.br +# makedumpfile \-E \-d 31 \-x vmlinux /proc/vmcore dumpfile + +.TP +\fB\-f\fR +Force existing DUMPFILE to be overwritten and mem-usage to work with older +kernel as well. +.br +.B Example: +.br +# makedumpfile \-f \-d 31 \-x vmlinux /proc/vmcore dumpfile +.br +This command overwrites \fIDUMPFILE\fR even if it already exists. +.br +# makedumpfile \-f \-\-mem\-usage /proc/kcore +.br +Kernel version lesser than v4.11 will not work with \-\-mem\-usage +functionality until it has been patched with upstream commit 464920104bf7. +Therefore if you have patched your older kernel then use \-f. + +.TP +\fB\-x\fR \fIVMLINUX\fR +Specify the first kernel's \fIVMLINUX\fR with debug information to analyze the +first kernel's memory usage. +.br +This option is necessary if \fIVMCORE\fR does not contain \fIVMCOREINFO\fR, +[\-i \fIVMCOREINFO\fR] is not specified, and dump_level is 2 or more. +.br +The page size of the first kernel and the second kernel should match. +.br +.B Example: +.br +# makedumpfile \-d 31 \-x vmlinux /proc/vmcore dumpfile + +.TP +\fB\-i\fR \fIVMCOREINFO\fR +Specify \fIVMCOREINFO\fR instead of \fIVMLINUX\fR for analyzing the first kernel's memory usage. +.br +\fIVMCOREINFO\fR should be made beforehand by makedumpfile with \-g option, and +it contains the first kernel's information. +.br +This option is necessary if \fIVMCORE\fR does not contain \fIVMCOREINFO\fR, +[\-x \fIVMLINUX\fR] is not specified, and dump_level is 2 or more. +.br +.B Example: +.br +# makedumpfile \-d 31 \-i vmcoreinfo /proc/vmcore dumpfile + +.TP +\fB\-g\fR \fIVMCOREINFO\fR +Generate \fIVMCOREINFO\fR from the first kernel's \fIVMLINUX\fR with debug +information. +.br +\fIVMCOREINFO\fR must be generated on the system that is running the first +kernel. With \-i option, a user can specify \fIVMCOREINFO\fR generated on the +other system that is running the same first kernel. [\-x \fIVMLINUX\fR] must be +specified. +.br +.B Example: +.br +# makedumpfile \-g vmcoreinfo \-x vmlinux + +.TP +\fB\-\-config\fR \fIFILTERCONFIGFILE\fR +Used in conjunction with \-x \fIVMLINUX\fR option, to specify the filter +config file \fIFILTERCONFIGFILE\fR that contains erase commands to filter out +desired kernel data from vmcore while creating \fIDUMPFILE\fR. For filter +command syntax please refer to \fBmakedumpfile.conf(5)\fR. + +.TP +\fB\-\-eppic\fR \fIEPPICMACRO\fR +Used in conjunction with \-x \fIVMLINUX\fR option, to specify the eppic macro +file that contains filter rules or directory that contains eppic macro +files to filter out desired kernel data from vmcore while creating \fIDUMPFILE\fR. +When directory is specified, all the eppic macros in the directory are processed. + +.TP +\fB\-F\fR +Output the dump data in the flattened format to the standard output for +transporting the dump data by SSH. +.br +Analysis tools (crash utility before version 5.1.2 or GDB) cannot read the +flattened format directly. For analysis, the dump data in the flattened format +should be rearranged to a normal \fIDUMPFILE\fR (readable with analysis tools) +by \-R option. By which option is specified with \-F option, the format of the +rearranged \fIDUMPFILE\fR is fixed. +In other words, it is impossible to specify the \fIDUMPFILE\fR format when the +dump data is rearranged with \-R option. If specifying \-E option with \-F option, +the format of the rearranged \fIDUMPFILE\fR is the ELF format. Otherwise, it +is the kdump\-compressed format. All the messages are output to standard error +output by \-F option because standard output is used for the dump data. +.br +.B Example: +.br +# makedumpfile \-F \-c \-d 31 \-x vmlinux /proc/vmcore \e +.br +| ssh user@host "cat > dumpfile.tmp" +.br +# makedumpfile \-F \-c \-d 31 \-x vmlinux /proc/vmcore \e +.br +| ssh user@host "makedumpfile \-R dumpfile" +.br +# makedumpfile \-F \-E \-d 31 \-i vmcoreinfo /proc/vmcore \e +.br +| ssh user@host "makedumpfile \-R dumpfile" +.br +# makedumpfile \-F \-E \-\-xen-vmcoreinfo \fIVMCOREINFO\fR /proc/vmcore \e +.br +| ssh user@host "makedumpfile \-R dumpfile" + +.TP +\fB\-R\fR +Rearrange the dump data in the flattened format from the standard input to a +normal \fIDUMPFILE\fR (readable with analysis tools). +.br +.B Example: +.br +# makedumpfile \-R dumpfile < dumpfile.tmp +.br +# makedumpfile \-F \-d 31 \-x vmlinux /proc/vmcore \e +.br +| ssh user@host "makedumpfile \-R dumpfile" + +Instead of using \-R option, a perl script "makedumpfile\-R.pl" rearranges the +dump data in the flattened format to a normal \fIDUMPFILE\fR, too. The perl +script does not depend on architecture, and most systems have perl command. +Even if a remote host does not have makedumpfile, it is possible to rearrange +the dump data in the flattened format to a readable \fIDUMPFILE\fR on a remote +host by running this script. +.br +.B Example: +.br +# makedumpfile \-F \-d 31 \-x vmlinux /proc/vmcore \e +.br +| ssh user@host "makedumpfile\-R.pl dumpfile" + +.TP +\fB\-\-split\fR +Split the dump data to multiple \fIDUMPFILE\fRs in parallel. If specifying +\fIDUMPFILE\fRs on different storage devices, a device can share I/O load +with other devices and it reduces time for saving the dump data. The file +size of each \fIDUMPFILE\fR is smaller than the system memory size which +is divided by the number of \fIDUMPFILE\fRs. This feature supports only +the kdump\-compressed format. +.br +.B Example: +.br +# makedumpfile \-\-split \-d 31 \-x vmlinux /proc/vmcore dumpfile1 dumpfile2 + +.TP +\fB\-\-num\-threads\fR \fITHREADNUM\fR +Using multiple threads to read and compress data of each page in parallel. +And it will reduces time for saving \fIDUMPFILE\fR. +Note that if the usable cpu number is less than the thread number, it may +lead to great performance degradation. +This feature only supports creating \fIDUMPFILE\fR in kdump\-comressed +format from \fIVMCORE\fR in kdump\-compressed format or elf format. +.br +.B Example: +.br +# makedumpfile \-d 31 \-\-num\-threads 4 /proc/vmcore dumpfile + +.TP +\fB\-\-reassemble\fR +Reassemble multiple \fIDUMPFILE\fRs, which are created by \-\-split option, +into one \fIDUMPFILE\fR. dumpfile1 and dumpfile2 are reassembled into dumpfile +on the following example. +.br +.B Example: +.br +# makedumpfile \-\-reassemble dumpfile1 dumpfile2 dumpfile + +.TP +\fB\-b\fR \fI<order>\fR +Cache 2^order pages in ram when generating \fIDUMPFILE\fR before writing to output. +The default value is 4. + +.TP +\fB\-\-cyclic\-buffer\fR \fIbuffer_size\fR +Specify the buffer size in kilo bytes for bitmap data. +Filtering processing will be divided into multi cycles to fix the memory consumption, +the number of cycles is represented as: + + num_of_cycles = system_memory / (\fIbuffer_size\fR * 1024 * bit_per_bytes * page_size ) + +The lesser number of cycles, the faster working speed is expected. +By default, \fIbuffer_size\fR will be calculated automatically depending on system memory +size, so ordinary users don't need to specify this option. + +.br +.B Example: +.br +# makedumpfile \-\-cyclic\-buffer 1024 \-d 31 \-x vmlinux /proc/vmcore dumpfile + +.TP +\fB\-\-splitblock\-size\fR \fIsplitblock_size\fR +Specify the splitblock size in kilo bytes for analysis with --split. +If --splitblock N is specified, difference of each splitted dumpfile size is at most N +kilo bytes. +.br +.B Example: +.br +# makedumpfile \-\-splitblock\-size 1024 \-d 31 \-x vmlinux \-\-split /proc/vmcore dumpfile1 dumpfile2 + +.TP + +\fB\-\-work\-dir\fR +Specify the working directory for the temporary bitmap file. +If this option isn't specified, the bitmap will be saved on memory. +Filtering processing has to do 2 pass scanning to fix the memory consumption, +but it can be avoided by using working directory on file system. +So if you specify this option, the filtering speed may be bit faster. + +.br +.B Example: +.br +# makedumpfile \-\-work\-dir /tmp \-d 31 \-x vmlinux /proc/vmcore dumpfile + +.TP +\fB\-\-non\-mmap\fR +Never use \fBmmap(2)\fR to read \fIVMCORE\fR even if it supports \fBmmap(2)\fR. +Generally, reading \fIVMCORE\fR with \fBmmap(2)\fR is faster than without it, +so ordinary users don't need to specify this option. +This option is mainly for debugging. +.br +.B Example: +.br +# makedumpfile \-\-non\-mmap \-d 31 \-x vmlinux /proc/vmcore dumpfile + +.TP +\fB\-\-xen-syms\fR \fIXEN-SYMS\fR +Specify the \fIXEN-SYMS\fR with debug information to analyze the xen's memory usage. +This option extracts the part of xen and domain-0. +.br +.B Example: +.br +# makedumpfile \-E \-\-xen-syms xen-syms /proc/vmcore dumpfile + +.TP +\fB\-\-xen-vmcoreinfo\fR \fIVMCOREINFO\fR +Specify \fIVMCOREINFO\fR instead of \fIXEN-SYMS\fR for analyzing the xen's memory usage. +.br +\fIVMCOREINFO\fR should be made beforehand by makedumpfile with \-g option, and +it contains the xen's information. +.br +.B Example: +.br +# makedumpfile \-E \-\-xen-vmcoreinfo \fIVMCOREINFO\fR /proc/vmcore dumpfile + +.TP +\fB\-X\fR +Exclude all the user domain pages from Xen kdump's \fIVMCORE\fR, and extracts the +part of xen and domain-0. If \fIVMCORE\fR contains \fIVMCOREINFO\fR for Xen, it is +not necessary to specify \fI\-\-xen-syms\fR and \fI\-\-xen-vmcoreinfo\fR. +.br +.B Example: +.br +# makedumpfile \-E \-X /proc/vmcore dumpfile + +.TP +\fB\-\-xen_phys_start\fR \fIxen_phys_start_address\fR +This option is only for x86_64. +Specify the \fIxen_phys_start_address\fR, if the xen code/data is relocatable +and \fIVMCORE\fR does not contain \fIxen_phys_start_address\fR in the CRASHINFO. +\fIxen_phys_start_address\fR can be taken from the line of "Hypervisor code +and data" in /proc/iomem. For example, specify 0xcee00000 as \fIxen_phys_start_address\fR +if /proc/iomem is the following: + ------------------------------------------------------- + # cat /proc/iomem + ... + cee00000-cfd99999 : Hypervisor code and data + ... + ------------------------------------------------------- + +.br +.B Example: +.br +# makedumpfile \-E \-X \-\-xen_phys_start 0xcee00000 /proc/vmcore dumpfile + +.TP +\fB\-\-message-level\fR \fImessage_level\fR +Specify the message types. +.br +Users can restrict outputs printed by specifying \fImessage_level\fR +with this option. The message type marked with an X in the following +table is printed. For example, according to the table, specifying 7 as +\fImessage_level\fR means progress indicator, common message, and error +message are printed, and this is a default value. Note that the maximum +value of \fImessage_level\fR is 31. +.br + + message | progress | common | error | debug | report + level | indicator| message | message | message | message +.br +\-\-\-\-\-\-\-\-\-+\-\-\-\-\-\-\-\-\-\-+\-\-\-\-\-\-\-\-\-+\-\-\-\-\-\-\-\-\-+\-\-\-\-\-\-\-\-\-+\-\-\-\-\-\-\-\-\- + 0 | | | | | + 1 | X | | | | + 2 | | X | | | + 3 | X | X | | | + 4 | | | X | | + 5 | X | | X | | + 6 | | X | X | | + * 7 | X | X | X | | + 8 | | | | X | + 9 | X | | | X | + 10 | | X | | X | + 11 | X | X | | X | + 12 | | | X | X | + 13 | X | | X | X | + 14 | | X | X | X | + 15 | X | X | X | X | + 16 | | | | | X + 17 | X | | | | X + 18 | | X | | | X + 19 | X | X | | | X + 20 | | | X | | X + 21 | X | | X | | X + 22 | | X | X | | X + 23 | X | X | X | | X + 24 | | | | X | X + 25 | X | | | X | X + 26 | | X | | X | X + 27 | X | X | | X | X + 28 | | | X | X | X + 29 | X | | X | X | X + 30 | | X | X | X | X + 31 | X | X | X | X | X + +.TP +\fB\-\-vtop\fR \fIvirtual_address\fR +This option is useful, when user debugs the translation problem +of virtual address. If specifing \fIvirtual_address\fR, its physical +address is printed. It makes debugging easy by comparing the +output of this option with the one of "vtop" subcommand of the +crash utility. +"--vtop" option only prints the translation output, and it does +not affect the dumpfile creation. + +.TP +\fB\-\-dump-dmesg\fR +This option overrides the normal behavior of makedumpfile. Instead of +compressing and filtering a \fIVMCORE\fR to make it smaller, it simply +extracts the dmesg log from a \fIVMCORE\fR and writes it to the specified +\fILOGFILE\fR. If a \fIVMCORE\fR does not contain \fIVMCOREINFO\fR for dmesg, +it is necessary to specfiy [\-x \fIVMLINUX\fR] or [\-i \fIVMCOREINFO\fR]. + +.br +.B Example: +.br +# makedumpfile \-\-dump-dmesg /proc/vmcore dmesgfile +.br +# makedumpfile \-\-dump-dmesg -x vmlinux /proc/vmcore dmesgfile +.br + + +.TP +\fB\-\-partial-dmesg\fR +This option will make --dump-dmesg extract only dmesg logs since that buffer was +last cleared on the crashed kernel, through "dmesg --clear" for example. + + +.TP +\fB\-\-mem-usage\fR +This option is currently supported on x86_64, arm64, ppc64 and s390x. +This option is used to show the page numbers of current system in different +use. It should be executed in 1st kernel. By the help of this, user can know +how many pages is dumpable when different dump_level is specified. It analyzes +the 'System Ram' and 'kernel text' program segment of /proc/kcore excluding +the crashkernel range, then calculates the page number of different kind per +vmcoreinfo. So currently /proc/kcore need be specified explicitly. + +.br +.B Example: +.br +# makedumpfile \-\-mem-usage /proc/kcore +.br + + +.TP +\fB\-\-diskset=VMCORE\fR +Specify multiple \fIVMCORE\fRs created on sadump diskset configuration +the same number of times as the number of \fIVMCORE\fRs in increasing +order from left to right. \fIVMCORE\fRs are assembled into a single +\fIDUMPFILE. + +.br +.B Example: +.br +# makedumpfile \-x vmlinux \-\-diskset=vmcore1 \-\-diskset=vmcore2 dumpfile + +.TP +\fB\-D\fR +Print debugging message. + +.TP +\fB\-h (\-\-help)\fR +Show help message and LZO/snappy support status (enabled/disabled). + +.TP +\fB\-v\fR +Show the version of makedumpfile. + +.SH ENVIRONMENT VARIABLES + +.TP 8 +.B TMPDIR +This environment variable is used in 1st kernel environment for a temporary memory bitmap file. +If your machine has a lots of memory and you use small tmpfs on /tmp, makedumpfile +can fail for a little memory because makedumpfile makes a very large temporary +memory bitmap file in this case. To avoid this failure, you should specify +--work-dir option to use file system on storage for the bitmap file. + +.SH DIAGNOSTICS +makedumpfile exits with the following value. +.TP +\fB0\fR : makedumpfile succeeded. +.TP +\fB1\fR : makedumpfile failed without the following reasons. +.TP +\fB2\fR : makedumpfile failed due to the different version between \fIVMLINUX\fR and \fIVMCORE\fR. + +.SH AUTHORS +.PP +Written by Masaki Tachibana, and Ken'ichi Ohmichi. + +.SH SEE ALSO +.PP +crash(8), gdb(1), kexec(8), makedumpfile.conf(5) + diff --git a/makedumpfile.c b/makedumpfile.c new file mode 100644 index 0000000..1ed3d61 --- /dev/null +++ b/makedumpfile.c @@ -0,0 +1,11607 @@ +/* + * makedumpfile.c + * + * Copyright (C) 2006, 2007, 2008, 2009, 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include "makedumpfile.h" +#include "print_info.h" +#include "dwarf_info.h" +#include "elf_info.h" +#include "erase_info.h" +#include "sadump_info.h" +#include "cache.h" +#include <stddef.h> +#include <ctype.h> +#include <sys/time.h> +#include <limits.h> +#include <assert.h> +#include <zlib.h> + +struct symbol_table symbol_table; +struct size_table size_table; +struct offset_table offset_table; +struct array_table array_table; +struct number_table number_table; +struct srcfile_table srcfile_table; +struct save_control sc; + +struct vm_table vt = { 0 }; +struct DumpInfo *info = NULL; +struct SplitBlock *splitblock = NULL; +struct vmap_pfns *gvmem_pfns; +int nr_gvmem_pfns; +extern int find_vmemmap(); + +char filename_stdout[] = FILENAME_STDOUT; + +/* Cache statistics */ +static unsigned long long cache_hit; +static unsigned long long cache_miss; + +static void first_cycle(mdf_pfn_t start, mdf_pfn_t max, struct cycle *cycle) +{ + cycle->start_pfn = round(start, info->pfn_cyclic); + cycle->end_pfn = cycle->start_pfn + info->pfn_cyclic; + + if (cycle->end_pfn > max) + cycle->end_pfn = max; + + cycle->exclude_pfn_start = 0; + cycle->exclude_pfn_end = 0; +} + +static void update_cycle(mdf_pfn_t max, struct cycle *cycle) +{ + cycle->start_pfn= cycle->end_pfn; + cycle->end_pfn= cycle->start_pfn + info->pfn_cyclic; + + if (cycle->end_pfn > max) + cycle->end_pfn = max; +} + +static int end_cycle(mdf_pfn_t max, struct cycle *cycle) +{ + return (cycle->start_pfn >= max)?TRUE:FALSE; +} + +#define for_each_cycle(start, max, C) \ + for (first_cycle(start, max, C); !end_cycle(max, C); \ + update_cycle(max, C)) + +/* + * The numbers of the excluded pages + */ +mdf_pfn_t pfn_zero; +mdf_pfn_t pfn_memhole; +mdf_pfn_t pfn_cache; +mdf_pfn_t pfn_cache_private; +mdf_pfn_t pfn_user; +mdf_pfn_t pfn_free; +mdf_pfn_t pfn_hwpoison; + +mdf_pfn_t num_dumped; + +int retcd = FAILED; /* return code */ + +#define INITIALIZE_LONG_TABLE(table, value) \ +do { \ + size_member = sizeof(long); \ + num_member = sizeof(table) / size_member; \ + ptr_long_table = (long *)&table; \ + for (i = 0; i < num_member; i++, ptr_long_table++) \ + *ptr_long_table = value; \ +} while (0) + +static void setup_page_is_buddy(void); + +void +initialize_tables(void) +{ + int i, size_member, num_member; + unsigned long long *ptr_symtable; + long *ptr_long_table; + + /* + * Initialize the symbol table. + */ + size_member = sizeof(symbol_table.mem_map); + num_member = sizeof(symbol_table) / size_member; + + ptr_symtable = (unsigned long long *)&symbol_table; + + for (i = 0; i < num_member; i++, ptr_symtable++) + *ptr_symtable = NOT_FOUND_SYMBOL; + + INITIALIZE_LONG_TABLE(size_table, NOT_FOUND_STRUCTURE); + INITIALIZE_LONG_TABLE(offset_table, NOT_FOUND_STRUCTURE); + INITIALIZE_LONG_TABLE(array_table, NOT_FOUND_STRUCTURE); + INITIALIZE_LONG_TABLE(number_table, NOT_FOUND_NUMBER); +} + +/* + * Translate a domain-0's physical address to machine address. + */ +unsigned long long +ptom_xen(unsigned long long paddr) +{ + unsigned long mfn; + unsigned long long maddr; + mdf_pfn_t pfn; + unsigned long long mfn_idx, frame_idx; + + pfn = paddr_to_pfn(paddr); + mfn_idx = pfn / MFNS_PER_FRAME; + frame_idx = pfn % MFNS_PER_FRAME; + + if (mfn_idx >= info->p2m_frames) { + ERRMSG("Invalid mfn_idx(%llu).\n", mfn_idx); + return NOT_PADDR; + } + maddr = pfn_to_paddr(info->p2m_mfn_frame_list[mfn_idx]) + + sizeof(unsigned long) * frame_idx; + if (!readmem(PADDR, maddr, &mfn, sizeof(mfn))) { + ERRMSG("Can't get mfn.\n"); + return NOT_PADDR; + } + maddr = pfn_to_paddr(mfn); + maddr |= PAGEOFFSET(paddr); + + return maddr; +} + +/* + * Get the number of the page descriptors from the ELF info. + */ +int +get_max_mapnr(void) +{ + unsigned long long max_paddr; + + if (info->flag_refiltering) { + if (info->dh_memory->header_version >= 6) + info->max_mapnr = info->kh_memory->max_mapnr_64; + else + info->max_mapnr = info->dh_memory->max_mapnr; + return TRUE; + } + + if (info->flag_sadump) { + info->max_mapnr = sadump_get_max_mapnr(); + return TRUE; + } + + max_paddr = get_max_paddr(); + info->max_mapnr = paddr_to_pfn(roundup(max_paddr, PAGESIZE())); + + DEBUG_MSG("\n"); + DEBUG_MSG("max_mapnr : %llx\n", info->max_mapnr); + + return TRUE; +} + +/* + * Get the number of the page descriptors for Xen. + */ +int +get_dom0_mapnr() +{ + unsigned long max_pfn; + + if (SYMBOL(max_pfn) != NOT_FOUND_SYMBOL) { + if (!readmem(VADDR, SYMBOL(max_pfn), &max_pfn, sizeof max_pfn)) { + ERRMSG("Can't read domain-0 max_pfn.\n"); + return FALSE; + } + + info->dom0_mapnr = max_pfn; + } else if (info->p2m_frames) { + unsigned long mfns[MFNS_PER_FRAME]; + unsigned long mfn_idx = info->p2m_frames - 1; + unsigned long long maddr; + unsigned i; + + maddr = pfn_to_paddr(info->p2m_mfn_frame_list[mfn_idx]); + if (!readmem(PADDR, maddr, &mfns, sizeof(mfns))) { + ERRMSG("Can't read %ld domain-0 mfns at 0x%llu\n", + (long)MFNS_PER_FRAME, maddr); + return FALSE; + } + + for (i = 0; i < MFNS_PER_FRAME; ++i) + if (!mfns[i]) + break; + + info->dom0_mapnr = mfn_idx * MFNS_PER_FRAME + i; + } else { + /* dom0_mapnr is unavailable, which may be non-critical */ + return TRUE; + } + + DEBUG_MSG("domain-0 pfn : %llx\n", info->dom0_mapnr); + return TRUE; +} + +int +is_in_same_page(unsigned long vaddr1, unsigned long vaddr2) +{ + if (round(vaddr1, info->page_size) == round(vaddr2, info->page_size)) + return TRUE; + + return FALSE; +} + +static inline int +isHugetlb(unsigned long dtor) +{ + return ((NUMBER(HUGETLB_PAGE_DTOR) != NOT_FOUND_NUMBER) + && (NUMBER(HUGETLB_PAGE_DTOR) == dtor)) + || ((SYMBOL(free_huge_page) != NOT_FOUND_SYMBOL) + && (SYMBOL(free_huge_page) == dtor)); +} + +static int +is_cache_page(unsigned long flags) +{ + if (isLRU(flags)) + return TRUE; + + /* PG_swapcache is valid only if: + * a. PG_swapbacked bit is set, or + * b. PG_swapbacked did not exist (kernels before 4.10-rc1). + */ + if ((NUMBER(PG_swapbacked) == NOT_FOUND_NUMBER || isSwapBacked(flags)) + && isSwapCache(flags)) + return TRUE; + + return FALSE; +} + +static inline unsigned long +calculate_len_buf_out(long page_size) +{ + unsigned long len_buf_out_zlib, len_buf_out_lzo, len_buf_out_snappy; + unsigned long len_buf_out; + + len_buf_out_zlib = len_buf_out_lzo = len_buf_out_snappy = 0; + +#ifdef USELZO + len_buf_out_lzo = page_size + page_size / 16 + 64 + 3; +#endif + +#ifdef USESNAPPY + len_buf_out_snappy = snappy_max_compressed_length(page_size); +#endif + + len_buf_out_zlib = compressBound(page_size); + + len_buf_out = MAX(len_buf_out_zlib, + MAX(len_buf_out_lzo, + len_buf_out_snappy)); + + return len_buf_out; +} + +#define BITMAP_SECT_LEN 4096 +static inline int is_dumpable(struct dump_bitmap *, mdf_pfn_t, struct cycle *cycle); +unsigned long +pfn_to_pos(mdf_pfn_t pfn) +{ + unsigned long desc_pos; + mdf_pfn_t i; + + desc_pos = info->valid_pages[pfn / BITMAP_SECT_LEN]; + for (i = round(pfn, BITMAP_SECT_LEN); i < pfn; i++) + if (is_dumpable(info->bitmap_memory, i, NULL)) + desc_pos++; + + return desc_pos; +} + +unsigned long +pfn_to_pos_parallel(mdf_pfn_t pfn, struct dump_bitmap* bitmap_memory_parallel) +{ + unsigned long desc_pos; + mdf_pfn_t i; + + desc_pos = info->valid_pages[pfn / BITMAP_SECT_LEN]; + for (i = round(pfn, BITMAP_SECT_LEN); i < pfn; i++) + if (is_dumpable(bitmap_memory_parallel, i, NULL)) + desc_pos++; + + return desc_pos; +} + +int +read_page_desc(unsigned long long paddr, page_desc_t *pd) +{ + struct disk_dump_header *dh; + unsigned long desc_pos; + mdf_pfn_t pfn; + off_t offset; + + /* + * Find page descriptor + */ + dh = info->dh_memory; + offset + = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size + dh->bitmap_blocks) + * dh->block_size; + pfn = paddr_to_pfn(paddr); + desc_pos = pfn_to_pos(pfn); + offset += (off_t)desc_pos * sizeof(page_desc_t); + if (lseek(info->fd_memory, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + /* + * Read page descriptor + */ + if (read(info->fd_memory, pd, sizeof(*pd)) != sizeof(*pd)) { + ERRMSG("Can't read %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + /* + * Sanity check + */ + if (pd->size > dh->block_size) + return FALSE; + + return TRUE; +} + +int +read_page_desc_parallel(int fd_memory, unsigned long long paddr, + page_desc_t *pd, + struct dump_bitmap* bitmap_memory_parallel) +{ + struct disk_dump_header *dh; + unsigned long desc_pos; + mdf_pfn_t pfn; + off_t offset; + + /* + * Find page descriptor + */ + dh = info->dh_memory; + offset + = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size + dh->bitmap_blocks) + * dh->block_size; + pfn = paddr_to_pfn(paddr); + desc_pos = pfn_to_pos_parallel(pfn, bitmap_memory_parallel); + offset += (off_t)desc_pos * sizeof(page_desc_t); + if (lseek(fd_memory, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + /* + * Read page descriptor + */ + if (read(fd_memory, pd, sizeof(*pd)) != sizeof(*pd)) { + ERRMSG("Can't read %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + /* + * Sanity check + */ + if (pd->size > dh->block_size) + return FALSE; + + return TRUE; +} + +static void +unmap_cache(struct cache_entry *entry) +{ + munmap(entry->bufptr, entry->buflen); +} + +static int +update_mmap_range(off_t offset, int initial) { + off_t start_offset, end_offset; + off_t map_size; + off_t max_offset = get_max_file_offset(); + off_t pt_load_end = offset_to_pt_load_end(offset); + + /* + * offset for mmap() must be page aligned. + */ + start_offset = roundup(offset, info->page_size); + end_offset = MIN(max_offset, round(pt_load_end, info->page_size)); + + if (!pt_load_end || (end_offset - start_offset) <= 0) + return FALSE; + + map_size = MIN(end_offset - start_offset, info->mmap_region_size); + + info->mmap_buf = mmap(NULL, map_size, PROT_READ, MAP_PRIVATE, + info->fd_memory, start_offset); + + if (info->mmap_buf == MAP_FAILED) { + if (!initial) + DEBUG_MSG("Can't map [%llx-%llx] with mmap()\n %s", + (ulonglong)start_offset, + (ulonglong)(start_offset + map_size), + strerror(errno)); + return FALSE; + } + + info->mmap_start_offset = start_offset; + info->mmap_end_offset = start_offset + map_size; + + return TRUE; +} + +static int +update_mmap_range_parallel(int fd_memory, off_t offset, + struct mmap_cache *mmap_cache) +{ + off_t start_offset, end_offset; + off_t map_size; + off_t max_offset = get_max_file_offset(); + off_t pt_load_end = offset_to_pt_load_end(offset); + + /* + * mmap_buf must be cleaned + */ + if (mmap_cache->mmap_buf != MAP_FAILED) + munmap(mmap_cache->mmap_buf, mmap_cache->mmap_end_offset + - mmap_cache->mmap_start_offset); + + /* + * offset for mmap() must be page aligned. + */ + start_offset = roundup(offset, info->page_size); + end_offset = MIN(max_offset, round(pt_load_end, info->page_size)); + + if (!pt_load_end || (end_offset - start_offset) <= 0) + return FALSE; + + map_size = MIN(end_offset - start_offset, info->mmap_region_size); + + mmap_cache->mmap_buf = mmap(NULL, map_size, PROT_READ, MAP_PRIVATE, + fd_memory, start_offset); + + if (mmap_cache->mmap_buf == MAP_FAILED) { + return FALSE; + } + + mmap_cache->mmap_start_offset = start_offset; + mmap_cache->mmap_end_offset = start_offset + map_size; + + return TRUE; +} + +static int +is_mapped_with_mmap(off_t offset) { + + if (info->flag_usemmap == MMAP_ENABLE + && offset >= info->mmap_start_offset + && offset < info->mmap_end_offset) + return TRUE; + else + return FALSE; +} + +static int +is_mapped_with_mmap_parallel(off_t offset, struct mmap_cache *mmap_cache) { + if (offset >= mmap_cache->mmap_start_offset + && offset < mmap_cache->mmap_end_offset) + return TRUE; + else + return FALSE; +} + +int +initialize_mmap(void) { + unsigned long long phys_start; + info->mmap_region_size = MAP_REGION; + info->mmap_buf = MAP_FAILED; + + get_pt_load(0, &phys_start, NULL, NULL, NULL); + if (!update_mmap_range(paddr_to_offset(phys_start), 1)) + return FALSE; + + return TRUE; +} + +static char * +mappage_elf(unsigned long long paddr) +{ + off_t offset, offset2; + + if (info->flag_usemmap != MMAP_ENABLE) + return NULL; + + offset = paddr_to_offset(paddr); + if (!offset || page_is_fractional(offset)) + return NULL; + + offset2 = paddr_to_offset(paddr + info->page_size); + if (!offset2) + return NULL; + + if (offset2 - offset != info->page_size) + return NULL; + + if (!is_mapped_with_mmap(offset) && + !update_mmap_range(offset, 0)) { + ERRMSG("Can't read the dump memory(%s) with mmap().\n", + info->name_memory); + + ERRMSG("This kernel might have some problems about mmap().\n"); + ERRMSG("read() will be used instead of mmap() from now.\n"); + + /* + * Fall back to read(). + */ + info->flag_usemmap = MMAP_DISABLE; + return NULL; + } + + if (offset < info->mmap_start_offset || + offset + info->page_size > info->mmap_end_offset) + return NULL; + + return info->mmap_buf + (offset - info->mmap_start_offset); +} + +static char * +mappage_elf_parallel(int fd_memory, unsigned long long paddr, + struct mmap_cache *mmap_cache) +{ + off_t offset, offset2; + int flag_usemmap; + + pthread_rwlock_rdlock(&info->usemmap_rwlock); + flag_usemmap = info->flag_usemmap; + pthread_rwlock_unlock(&info->usemmap_rwlock); + if (flag_usemmap != MMAP_ENABLE) + return NULL; + + offset = paddr_to_offset(paddr); + if (!offset || page_is_fractional(offset)) + return NULL; + + offset2 = paddr_to_offset(paddr + info->page_size - 1); + if (!offset2) + return NULL; + + if (offset2 - offset != info->page_size - 1) + return NULL; + + if (!is_mapped_with_mmap_parallel(offset, mmap_cache) && + !update_mmap_range_parallel(fd_memory, offset, mmap_cache)) { + ERRMSG("Can't read the dump memory(%s) with mmap().\n", + info->name_memory); + + ERRMSG("This kernel might have some problems about mmap().\n"); + ERRMSG("read() will be used instead of mmap() from now.\n"); + + /* + * Fall back to read(). + */ + pthread_rwlock_wrlock(&info->usemmap_rwlock); + info->flag_usemmap = MMAP_DISABLE; + pthread_rwlock_unlock(&info->usemmap_rwlock); + return NULL; + } + + if (offset < mmap_cache->mmap_start_offset || + offset + info->page_size > mmap_cache->mmap_end_offset) + return NULL; + + return mmap_cache->mmap_buf + (offset - mmap_cache->mmap_start_offset); +} + +static int +read_from_vmcore(off_t offset, void *bufptr, unsigned long size) +{ + const off_t failed = (off_t)-1; + + if (lseek(info->fd_memory, offset, SEEK_SET) == failed) { + ERRMSG("Can't seek the dump memory(%s). (offset: %llx) %s\n", + info->name_memory, (unsigned long long)offset, strerror(errno)); + return FALSE; + } + + if (read(info->fd_memory, bufptr, size) != size) { + ERRMSG("Can't read the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + return TRUE; +} + +static int +read_from_vmcore_parallel(int fd_memory, off_t offset, void *bufptr, + unsigned long size) +{ + const off_t failed = (off_t)-1; + + if (lseek(fd_memory, offset, SEEK_SET) == failed) { + ERRMSG("Can't seek the dump memory(%s). (offset: %llx) %s\n", + info->name_memory, (unsigned long long)offset, strerror(errno)); + return FALSE; + } + + if (read(fd_memory, bufptr, size) != size) { + ERRMSG("Can't read the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + return TRUE; +} + +/* + * This function is specific for reading page from ELF. + * + * If reading the separated page on different PT_LOAD segments, + * this function gets the page data from both segments. This is + * worthy of ia64 /proc/vmcore. In ia64 /proc/vmcore, region 5 + * segment is overlapping to region 7 segment. The following is + * example (page_size is 16KBytes): + * + * region | paddr | memsz + * --------+--------------------+-------------------- + * 5 | 0x0000000004000000 | 0x0000000000638ce0 + * 7 | 0x0000000004000000 | 0x0000000000db3000 + * + * In the above example, the last page of region 5 is 0x4638000 + * and the segment does not contain complete data of this page. + * Then this function gets the data of 0x4638000 - 0x4638ce0 + * from region 5, and gets the remaining data from region 7. + */ +static int +readpage_elf(unsigned long long paddr, void *bufptr) +{ + int idx; + off_t offset, size; + void *p, *endp; + unsigned long long phys_start, phys_end; + + p = bufptr; + endp = p + info->page_size; + while (p < endp) { + idx = closest_pt_load(paddr, endp - p); + if (idx < 0) + break; + + get_pt_load_extents(idx, &phys_start, &phys_end, &offset, &size); + if (phys_start > paddr) { + memset(p, 0, phys_start - paddr); + p += phys_start - paddr; + paddr = phys_start; + } + + offset += paddr - phys_start; + if (size > paddr - phys_start) { + size -= paddr - phys_start; + if (size > endp - p) + size = endp - p; + if (!read_from_vmcore(offset, p, size)) { + ERRMSG("Can't read the dump memory(%s).\n", + info->name_memory); + return FALSE; + } + p += size; + paddr += size; + } + if (p < endp) { + size = phys_end - paddr; + if (size > endp - p) + size = endp - p; + memset(p, 0, size); + p += size; + paddr += size; + } + } + + if (p == bufptr) { + ERRMSG("Attempt to read non-existent page at 0x%llx.\n", + paddr); + return FALSE; + } else if (p < endp) + memset(p, 0, endp - p); + + return TRUE; +} + +static int +readpage_elf_parallel(int fd_memory, unsigned long long paddr, void *bufptr) +{ + int idx; + off_t offset, size; + void *p, *endp; + unsigned long long phys_start, phys_end; + + p = bufptr; + endp = p + info->page_size; + while (p < endp) { + idx = closest_pt_load(paddr, endp - p); + if (idx < 0) + break; + + get_pt_load_extents(idx, &phys_start, &phys_end, &offset, &size); + if (phys_start > paddr) { + memset(p, 0, phys_start - paddr); + p += phys_start - paddr; + paddr = phys_start; + } + + offset += paddr - phys_start; + if (size > paddr - phys_start) { + size -= paddr - phys_start; + if (size > endp - p) + size = endp - p; + if (!read_from_vmcore_parallel(fd_memory, offset, p, + size)) { + ERRMSG("Can't read the dump memory(%s).\n", + info->name_memory); + return FALSE; + } + p += size; + paddr += size; + } + if (p < endp) { + size = phys_end - paddr; + if (size > endp - p) + size = endp - p; + memset(p, 0, size); + p += size; + paddr += size; + } + } + + if (p == bufptr) { + ERRMSG("Attempt to read non-existent page at 0x%llx.\n", + paddr); + return FALSE; + } else if (p < endp) + memset(p, 0, endp - p); + + return TRUE; +} + +static int +readpage_kdump_compressed(unsigned long long paddr, void *bufptr) +{ + page_desc_t pd; + char buf[info->page_size], *rdbuf; + int ret; + unsigned long retlen; + + if (!is_dumpable(info->bitmap_memory, paddr_to_pfn(paddr), NULL)) { + ERRMSG("pfn(%llx) is excluded from %s.\n", + paddr_to_pfn(paddr), info->name_memory); + return FALSE; + } + + if (!read_page_desc(paddr, &pd)) { + ERRMSG("Can't read page_desc: %llx\n", paddr); + return FALSE; + } + + if (lseek(info->fd_memory, pd.offset, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + /* + * Read page data + */ + rdbuf = pd.flags & (DUMP_DH_COMPRESSED_ZLIB | DUMP_DH_COMPRESSED_LZO | + DUMP_DH_COMPRESSED_SNAPPY) ? buf : bufptr; + if (read(info->fd_memory, rdbuf, pd.size) != pd.size) { + ERRMSG("Can't read %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + if (pd.flags & DUMP_DH_COMPRESSED_ZLIB) { + retlen = info->page_size; + ret = uncompress((unsigned char *)bufptr, &retlen, + (unsigned char *)buf, pd.size); + if ((ret != Z_OK) || (retlen != info->page_size)) { + ERRMSG("Uncompress failed: %d\n", ret); + return FALSE; + } +#ifdef USELZO + } else if (info->flag_lzo_support + && (pd.flags & DUMP_DH_COMPRESSED_LZO)) { + retlen = info->page_size; + ret = lzo1x_decompress_safe((unsigned char *)buf, pd.size, + (unsigned char *)bufptr, &retlen, + LZO1X_MEM_DECOMPRESS); + if ((ret != LZO_E_OK) || (retlen != info->page_size)) { + ERRMSG("Uncompress failed: %d\n", ret); + return FALSE; + } +#endif +#ifdef USESNAPPY + } else if ((pd.flags & DUMP_DH_COMPRESSED_SNAPPY)) { + + ret = snappy_uncompressed_length(buf, pd.size, (size_t *)&retlen); + if (ret != SNAPPY_OK) { + ERRMSG("Uncompress failed: %d\n", ret); + return FALSE; + } + + ret = snappy_uncompress(buf, pd.size, bufptr, (size_t *)&retlen); + if ((ret != SNAPPY_OK) || (retlen != info->page_size)) { + ERRMSG("Uncompress failed: %d\n", ret); + return FALSE; + } +#endif + } + + return TRUE; +} + +static int +readpage_kdump_compressed_parallel(int fd_memory, unsigned long long paddr, + void *bufptr, + struct dump_bitmap* bitmap_memory_parallel) +{ + page_desc_t pd; + char buf[info->page_size], *rdbuf; + int ret; + unsigned long retlen; + + if (!is_dumpable(bitmap_memory_parallel, paddr_to_pfn(paddr), NULL)) { + ERRMSG("pfn(%llx) is excluded from %s.\n", + paddr_to_pfn(paddr), info->name_memory); + return FALSE; + } + + if (!read_page_desc_parallel(fd_memory, paddr, &pd, + bitmap_memory_parallel)) { + ERRMSG("Can't read page_desc: %llx\n", paddr); + return FALSE; + } + + if (lseek(fd_memory, pd.offset, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + /* + * Read page data + */ + rdbuf = pd.flags & (DUMP_DH_COMPRESSED_ZLIB | DUMP_DH_COMPRESSED_LZO | + DUMP_DH_COMPRESSED_SNAPPY) ? buf : bufptr; + if (read(fd_memory, rdbuf, pd.size) != pd.size) { + ERRMSG("Can't read %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + if (pd.flags & DUMP_DH_COMPRESSED_ZLIB) { + retlen = info->page_size; + ret = uncompress((unsigned char *)bufptr, &retlen, + (unsigned char *)buf, pd.size); + if ((ret != Z_OK) || (retlen != info->page_size)) { + ERRMSG("Uncompress failed: %d\n", ret); + return FALSE; + } +#ifdef USELZO + } else if (info->flag_lzo_support + && (pd.flags & DUMP_DH_COMPRESSED_LZO)) { + retlen = info->page_size; + ret = lzo1x_decompress_safe((unsigned char *)buf, pd.size, + (unsigned char *)bufptr, &retlen, + LZO1X_MEM_DECOMPRESS); + if ((ret != LZO_E_OK) || (retlen != info->page_size)) { + ERRMSG("Uncompress failed: %d\n", ret); + return FALSE; + } +#endif +#ifdef USESNAPPY + } else if ((pd.flags & DUMP_DH_COMPRESSED_SNAPPY)) { + + ret = snappy_uncompressed_length(buf, pd.size, (size_t *)&retlen); + if (ret != SNAPPY_OK) { + ERRMSG("Uncompress failed: %d\n", ret); + return FALSE; + } + + ret = snappy_uncompress(buf, pd.size, bufptr, (size_t *)&retlen); + if ((ret != SNAPPY_OK) || (retlen != info->page_size)) { + ERRMSG("Uncompress failed: %d\n", ret); + return FALSE; + } +#endif + } + + return TRUE; +} + +int +readmem(int type_addr, unsigned long long addr, void *bufptr, size_t size) +{ + size_t read_size, size_orig = size; + unsigned long long paddr; + unsigned long long pgaddr; + void *pgbuf; + struct cache_entry *cached; + +next_page: + switch (type_addr) { + case VADDR: + if ((paddr = vaddr_to_paddr(addr)) == NOT_PADDR) { + ERRMSG("Can't convert a virtual address(%llx) to physical address.\n", + addr); + goto error; + } + break; + case PADDR: + paddr = addr; + break; + case VADDR_XEN: + if ((paddr = kvtop_xen(addr)) == NOT_PADDR) { + ERRMSG("Can't convert a virtual address(%llx) to machine address.\n", + addr); + goto error; + } + break; + default: + ERRMSG("Invalid address type (%d).\n", type_addr); + goto error; + } + + /* + * Read each page, because pages are not necessarily continuous. + * Ex) pages in vmalloc area + */ + read_size = MIN(info->page_size - PAGEOFFSET(paddr), size); + + pgaddr = PAGEBASE(paddr); + pgbuf = cache_search(pgaddr, read_size); + if (!pgbuf) { + ++cache_miss; + cached = cache_alloc(pgaddr); + if (!cached) + goto error; + pgbuf = cached->bufptr; + + if (info->flag_refiltering) { + if (!readpage_kdump_compressed(pgaddr, pgbuf)) + goto error_cached; + } else if (info->flag_sadump) { + if (!readpage_sadump(pgaddr, pgbuf)) + goto error_cached; + } else { + char *mapbuf = mappage_elf(pgaddr); + size_t mapoff; + + if (mapbuf) { + pgbuf = mapbuf; + mapoff = mapbuf - info->mmap_buf; + cached->paddr = pgaddr - mapoff; + cached->bufptr = info->mmap_buf; + cached->buflen = info->mmap_end_offset - + info->mmap_start_offset; + cached->discard = unmap_cache; + } else if (!readpage_elf(pgaddr, pgbuf)) + goto error_cached; + } + cache_add(cached); + } else + ++cache_hit; + + memcpy(bufptr, pgbuf + PAGEOFFSET(paddr), read_size); + + addr += read_size; + bufptr += read_size; + size -= read_size; + + if (size > 0) + goto next_page; + + return size_orig; + +error_cached: + cache_free(cached); +error: + ERRMSG("type_addr: %d, addr:%llx, size:%zd\n", type_addr, addr, size_orig); + return FALSE; +} + +int32_t +get_kernel_version(char *release) +{ + int32_t version; + long maj, min, rel; + char *start, *end; + + if (info->kernel_version) + return info->kernel_version; + + /* + * This method checks that vmlinux and vmcore are same kernel version. + */ + start = release; + maj = strtol(start, &end, 10); + if (maj == LONG_MAX) + return FALSE; + + start = end + 1; + min = strtol(start, &end, 10); + if (min == LONG_MAX) + return FALSE; + + start = end + 1; + rel = strtol(start, &end, 10); + if (rel == LONG_MAX) + return FALSE; + + version = KERNEL_VERSION(maj, min, rel); + + if ((version < OLDEST_VERSION) || (LATEST_VERSION < version)) { + MSG("The kernel version is not supported.\n"); + MSG("The makedumpfile operation may be incomplete.\n"); + } + + return version; +} + +int +is_page_size(long page_size) +{ + /* + * Page size is restricted to a hamming weight of 1. + */ + if (page_size > 0 && !(page_size & (page_size - 1))) + return TRUE; + + return FALSE; +} + +int +set_page_size(long page_size) +{ + if (!is_page_size(page_size)) { + ERRMSG("Invalid page_size: %ld", page_size); + return FALSE; + } + info->page_size = page_size; + info->page_shift = ffs(info->page_size) - 1; + DEBUG_MSG("page_size : %ld\n", info->page_size); + + return TRUE; +} + +int +fallback_to_current_page_size(void) +{ + + if (!set_page_size(sysconf(_SC_PAGE_SIZE))) + return FALSE; + + DEBUG_MSG("WARNING: Cannot determine page size (no vmcoreinfo).\n"); + DEBUG_MSG("Using the dump kernel page size: %ld\n", + info->page_size); + + return TRUE; +} + +static int populate_kernel_version(void) +{ + struct utsname utsname; + + if (uname(&utsname)) { + ERRMSG("Cannot get name and information about current kernel : %s\n", + strerror(errno)); + return FALSE; + } + + info->kernel_version = get_kernel_version(utsname.release); + + return TRUE; +} + +int +check_release(void) +{ + unsigned long utsname; + + /* + * Get the kernel version. + */ + if (SYMBOL(system_utsname) != NOT_FOUND_SYMBOL) { + utsname = SYMBOL(system_utsname); + } else if (SYMBOL(init_uts_ns) != NOT_FOUND_SYMBOL) { + utsname = SYMBOL(init_uts_ns) + sizeof(int); + } else { + ERRMSG("Can't get the symbol of system_utsname.\n"); + return FALSE; + } + if (!readmem(VADDR, utsname, &info->system_utsname, + sizeof(struct utsname))) { + ERRMSG("Can't get the address of system_utsname.\n"); + return FALSE; + } + + if (info->flag_read_vmcoreinfo) { + if (strcmp(info->system_utsname.release, info->release)) { + ERRMSG("%s and %s don't match.\n", + info->name_vmcoreinfo, info->name_memory); + retcd = WRONG_RELEASE; + return FALSE; + } + } + + info->kernel_version = get_kernel_version(info->system_utsname.release); + if (info->kernel_version == FALSE) { + ERRMSG("Can't get the kernel version.\n"); + return FALSE; + } + + return TRUE; +} + +int +open_vmcoreinfo(char *mode) +{ + FILE *file_vmcoreinfo; + + if ((file_vmcoreinfo = fopen(info->name_vmcoreinfo, mode)) == NULL) { + ERRMSG("Can't open the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return FALSE; + } + info->file_vmcoreinfo = file_vmcoreinfo; + return TRUE; +} + +int +open_kernel_file(void) +{ + int fd; + + if (info->name_vmlinux) { + if ((fd = open(info->name_vmlinux, O_RDONLY)) < 0) { + ERRMSG("Can't open the kernel file(%s). %s\n", + info->name_vmlinux, strerror(errno)); + return FALSE; + } + info->fd_vmlinux = fd; + } + if (info->name_xen_syms) { + if ((fd = open(info->name_xen_syms, O_RDONLY)) < 0) { + ERRMSG("Can't open the kernel file(%s). %s\n", + info->name_xen_syms, strerror(errno)); + return FALSE; + } + info->fd_xen_syms = fd; + } + return TRUE; +} + +int +check_kdump_compressed(char *filename) +{ + struct disk_dump_header dh; + + if (!__read_disk_dump_header(&dh, filename)) + return ERROR; + + if (strncmp(dh.signature, KDUMP_SIGNATURE, SIG_LEN)) + return FALSE; + + return TRUE; +} + +int +get_kdump_compressed_header_info(char *filename) +{ + struct disk_dump_header dh; + struct kdump_sub_header kh; + + if (!read_disk_dump_header(&dh, filename)) + return FALSE; + + if (!read_kdump_sub_header(&kh, filename)) + return FALSE; + + if (dh.header_version < 1) { + ERRMSG("header does not have dump_level member\n"); + return FALSE; + } + DEBUG_MSG("diskdump main header\n"); + DEBUG_MSG(" signature : %s\n", dh.signature); + DEBUG_MSG(" header_version : %d\n", dh.header_version); + DEBUG_MSG(" status : %d\n", dh.status); + DEBUG_MSG(" block_size : %d\n", dh.block_size); + DEBUG_MSG(" sub_hdr_size : %d\n", dh.sub_hdr_size); + DEBUG_MSG(" bitmap_blocks : %d\n", dh.bitmap_blocks); + DEBUG_MSG(" max_mapnr : 0x%x\n", dh.max_mapnr); + DEBUG_MSG(" total_ram_blocks : %d\n", dh.total_ram_blocks); + DEBUG_MSG(" device_blocks : %d\n", dh.device_blocks); + DEBUG_MSG(" written_blocks : %d\n", dh.written_blocks); + DEBUG_MSG(" current_cpu : %d\n", dh.current_cpu); + DEBUG_MSG(" nr_cpus : %d\n", dh.nr_cpus); + DEBUG_MSG("kdump sub header\n"); + DEBUG_MSG(" phys_base : 0x%lx\n", kh.phys_base); + DEBUG_MSG(" dump_level : %d\n", kh.dump_level); + DEBUG_MSG(" split : %d\n", kh.split); + DEBUG_MSG(" start_pfn : 0x%lx\n", kh.start_pfn); + DEBUG_MSG(" end_pfn : 0x%lx\n", kh.end_pfn); + if (dh.header_version >= 6) { + /* A dumpfile contains full 64bit values. */ + DEBUG_MSG(" start_pfn_64 : 0x%llx\n", kh.start_pfn_64); + DEBUG_MSG(" end_pfn_64 : 0x%llx\n", kh.end_pfn_64); + DEBUG_MSG(" max_mapnr_64 : 0x%llx\n", kh.max_mapnr_64); + } + + info->dh_memory = malloc(sizeof(dh)); + if (info->dh_memory == NULL) { + ERRMSG("Can't allocate memory for the header. %s\n", + strerror(errno)); + return FALSE; + } + memcpy(info->dh_memory, &dh, sizeof(dh)); + memcpy(&info->timestamp, &dh.timestamp, sizeof(dh.timestamp)); + + info->kh_memory = malloc(sizeof(kh)); + if (info->kh_memory == NULL) { + ERRMSG("Can't allocate memory for the sub header. %s\n", + strerror(errno)); + goto error; + } + memcpy(info->kh_memory, &kh, sizeof(kh)); + set_nr_cpus(dh.nr_cpus); + + if (dh.header_version >= 3) { + /* A dumpfile contains vmcoreinfo data. */ + set_vmcoreinfo(kh.offset_vmcoreinfo, kh.size_vmcoreinfo); + DEBUG_MSG(" offset_vmcoreinfo: 0x%llx\n", + (unsigned long long)kh.offset_vmcoreinfo); + DEBUG_MSG(" size_vmcoreinfo : 0x%ld\n", kh.size_vmcoreinfo); + } + if (dh.header_version >= 4) { + /* A dumpfile contains ELF note section. */ + set_pt_note(kh.offset_note, kh.size_note); + DEBUG_MSG(" offset_note : 0x%llx\n", + (unsigned long long)kh.offset_note); + DEBUG_MSG(" size_note : 0x%ld\n", kh.size_note); + } + if (dh.header_version >= 5) { + /* A dumpfile contains erased information. */ + set_eraseinfo(kh.offset_eraseinfo, kh.size_eraseinfo); + DEBUG_MSG(" offset_eraseinfo : 0x%llx\n", + (unsigned long long)kh.offset_eraseinfo); + DEBUG_MSG(" size_eraseinfo : 0x%ld\n", kh.size_eraseinfo); + } + return TRUE; +error: + free(info->dh_memory); + info->dh_memory = NULL; + + return FALSE; +} + +int +open_dump_memory(void) +{ + int fd, status; + + if ((fd = open(info->name_memory, O_RDONLY)) < 0) { + ERRMSG("Can't open the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + info->fd_memory = fd; + + status = check_kdump_compressed(info->name_memory); + if (status == TRUE) { + info->flag_refiltering = TRUE; + return get_kdump_compressed_header_info(info->name_memory); + } + + status = check_and_get_sadump_header_info(info->name_memory); + if (status == TRUE) + return TRUE; + + if (status == ERROR) + return TRUE; + + return FALSE; +} + +int +open_dump_file(void) +{ + int fd; + int open_flags = O_RDWR|O_CREAT|O_TRUNC; + + if (!info->flag_force) + open_flags |= O_EXCL; + + if (info->flag_flatten) { + fd = STDOUT_FILENO; + info->name_dumpfile = filename_stdout; + } else if ((fd = open(info->name_dumpfile, open_flags, + S_IRUSR|S_IWUSR)) < 0) { + ERRMSG("Can't open the dump file(%s). %s\n", + info->name_dumpfile, strerror(errno)); + return FALSE; + } + info->fd_dumpfile = fd; + return TRUE; +} + +int +check_dump_file(const char *path) +{ + char *err_str; + + if (access(path, F_OK) != 0) + return TRUE; /* File does not exist */ + if (info->flag_force) { + if (access(path, W_OK) == 0) + return TRUE; /* We have write permission */ + err_str = strerror(errno); + } else { + err_str = strerror(EEXIST); + } + ERRMSG("Can't open the dump file (%s). %s\n", path, err_str); + return FALSE; +} + +int +open_dump_bitmap(void) +{ + int i, fd; + char *tmpname; + + /* Unnecessary to open */ + if (!info->working_dir && !info->flag_reassemble && !info->flag_refiltering + && !info->flag_sadump && !info->flag_mem_usage && info->flag_cyclic) + return TRUE; + + tmpname = getenv("TMPDIR"); + if (info->working_dir) + tmpname = info->working_dir; + else if (!tmpname) + tmpname = "/tmp"; + + if ((info->name_bitmap = (char *)malloc(sizeof(FILENAME_BITMAP) + + strlen(tmpname) + 1)) == NULL) { + ERRMSG("Can't allocate memory for the filename. %s\n", + strerror(errno)); + return FALSE; + } + strcpy(info->name_bitmap, tmpname); + strcat(info->name_bitmap, "/"); + strcat(info->name_bitmap, FILENAME_BITMAP); + if ((fd = mkstemp(info->name_bitmap)) < 0) { + ERRMSG("Can't open the bitmap file(%s). %s\n", + info->name_bitmap, strerror(errno)); + return FALSE; + } + info->fd_bitmap = fd; + + if (info->flag_split) { + /* + * Reserve file descriptors of bitmap for creating split + * dumpfiles by multiple processes, because a bitmap file will + * be unlinked just after this and it is not possible to open + * a bitmap file later. + */ + for (i = 0; i < info->num_dumpfile; i++) { + if ((fd = open(info->name_bitmap, O_RDONLY)) < 0) { + ERRMSG("Can't open the bitmap file(%s). %s\n", + info->name_bitmap, strerror(errno)); + return FALSE; + } + SPLITTING_FD_BITMAP(i) = fd; + } + } + + if (info->num_threads) { + /* + * Reserve file descriptors of bitmap for creating dumpfiles + * parallelly, because a bitmap file will be unlinked just after + * this and it is not possible to open a bitmap file later. + */ + for (i = 0; i < info->num_threads; i++) { + if ((fd = open(info->name_bitmap, O_RDONLY)) < 0) { + ERRMSG("Can't open the bitmap file(%s). %s\n", + info->name_bitmap, strerror(errno)); + return FALSE; + } + FD_BITMAP_PARALLEL(i) = fd; + } + } + + unlink(info->name_bitmap); + + return TRUE; +} + +/* + * Open the following files when it generates the vmcoreinfo file. + * - vmlinux + * - vmcoreinfo file + */ +int +open_files_for_generating_vmcoreinfo(void) +{ + if (!open_kernel_file()) + return FALSE; + + if (!open_vmcoreinfo("w")) + return FALSE; + + return TRUE; +} + +/* + * Open the following file when it rearranges the dump data. + * - dump file + */ +int +open_files_for_rearranging_dumpdata(void) +{ + if (!open_dump_file()) + return FALSE; + + return TRUE; +} + +/* + * Open the following files when it creates the dump file. + * - dump mem + * - bit map + * if it reads the vmcoreinfo file + * - vmcoreinfo file + * else + * - vmlinux + */ +int +open_files_for_creating_dumpfile(void) +{ + if (info->flag_read_vmcoreinfo) { + if (!open_vmcoreinfo("r")) + return FALSE; + } else { + if (!open_kernel_file()) + return FALSE; + } + if (!open_dump_memory()) + return FALSE; + + return TRUE; +} + +int +is_kvaddr(unsigned long long addr) +{ + return (addr >= (unsigned long long)(KVBASE)); +} + +int +get_symbol_info(void) +{ + /* + * Get symbol info. + */ + SYMBOL_INIT(mem_map, "mem_map"); + SYMBOL_INIT(vmem_map, "vmem_map"); + SYMBOL_INIT(mem_section, "mem_section"); + SYMBOL_INIT(pkmap_count, "pkmap_count"); + SYMBOL_INIT_NEXT(pkmap_count_next, "pkmap_count"); + SYMBOL_INIT(system_utsname, "system_utsname"); + SYMBOL_INIT(init_uts_ns, "init_uts_ns"); + SYMBOL_INIT(_stext, "_stext"); + SYMBOL_INIT(swapper_pg_dir, "swapper_pg_dir"); + SYMBOL_INIT(init_level4_pgt, "init_level4_pgt"); + SYMBOL_INIT(level4_kernel_pgt, "level4_kernel_pgt"); + SYMBOL_INIT(init_top_pgt, "init_top_pgt"); + SYMBOL_INIT(vmlist, "vmlist"); + SYMBOL_INIT(vmap_area_list, "vmap_area_list"); + SYMBOL_INIT(node_online_map, "node_online_map"); + SYMBOL_INIT(node_states, "node_states"); + SYMBOL_INIT(node_memblk, "node_memblk"); + SYMBOL_INIT(node_data, "node_data"); + SYMBOL_INIT(pgdat_list, "pgdat_list"); + SYMBOL_INIT(contig_page_data, "contig_page_data"); + SYMBOL_INIT(log_buf, "log_buf"); + SYMBOL_INIT(log_buf_len, "log_buf_len"); + SYMBOL_INIT(log_end, "log_end"); + SYMBOL_INIT(log_first_idx, "log_first_idx"); + SYMBOL_INIT(clear_idx, "clear_idx"); + SYMBOL_INIT(log_next_idx, "log_next_idx"); + SYMBOL_INIT(max_pfn, "max_pfn"); + SYMBOL_INIT(modules, "modules"); + SYMBOL_INIT(high_memory, "high_memory"); + SYMBOL_INIT(linux_banner, "linux_banner"); + SYMBOL_INIT(bios_cpu_apicid, "bios_cpu_apicid"); + SYMBOL_INIT(x86_bios_cpu_apicid, "x86_bios_cpu_apicid"); + if (SYMBOL(x86_bios_cpu_apicid) == NOT_FOUND_SYMBOL) + SYMBOL_INIT(x86_bios_cpu_apicid, + "per_cpu__x86_bios_cpu_apicid"); + SYMBOL_INIT(x86_bios_cpu_apicid_early_ptr, + "x86_bios_cpu_apicid_early_ptr"); + SYMBOL_INIT(x86_bios_cpu_apicid_early_map, + "x86_bios_cpu_apicid_early_map"); + SYMBOL_INIT(crash_notes, "crash_notes"); + SYMBOL_INIT(__per_cpu_load, "__per_cpu_load"); + SYMBOL_INIT(__per_cpu_offset, "__per_cpu_offset"); + SYMBOL_INIT(cpu_online_mask, "cpu_online_mask"); + SYMBOL_INIT(__cpu_online_mask, "__cpu_online_mask"); + if (SYMBOL(cpu_online_mask) == NOT_FOUND_SYMBOL) { + if (SYMBOL(__cpu_online_mask) == NOT_FOUND_SYMBOL) + SYMBOL_INIT(cpu_online_mask, "cpu_online_map"); + else + SYMBOL_INIT(cpu_online_mask, "__cpu_online_mask"); + } + SYMBOL_INIT(kexec_crash_image, "kexec_crash_image"); + SYMBOL_INIT(node_remap_start_vaddr, "node_remap_start_vaddr"); + SYMBOL_INIT(node_remap_end_vaddr, "node_remap_end_vaddr"); + SYMBOL_INIT(node_remap_start_pfn, "node_remap_start_pfn"); + + if (SYMBOL(node_data) != NOT_FOUND_SYMBOL) + SYMBOL_ARRAY_TYPE_INIT(node_data, "node_data"); + if (SYMBOL(pgdat_list) != NOT_FOUND_SYMBOL) + SYMBOL_ARRAY_LENGTH_INIT(pgdat_list, "pgdat_list"); + if (SYMBOL(mem_section) != NOT_FOUND_SYMBOL) + SYMBOL_ARRAY_LENGTH_INIT(mem_section, "mem_section"); + if (SYMBOL(node_memblk) != NOT_FOUND_SYMBOL) + SYMBOL_ARRAY_LENGTH_INIT(node_memblk, "node_memblk"); + if (SYMBOL(__per_cpu_offset) != NOT_FOUND_SYMBOL) + SYMBOL_ARRAY_LENGTH_INIT(__per_cpu_offset, "__per_cpu_offset"); + if (SYMBOL(node_remap_start_pfn) != NOT_FOUND_SYMBOL) + SYMBOL_ARRAY_LENGTH_INIT(node_remap_start_pfn, + "node_remap_start_pfn"); + + SYMBOL_INIT(vmemmap_list, "vmemmap_list"); + SYMBOL_INIT(mmu_psize_defs, "mmu_psize_defs"); + SYMBOL_INIT(mmu_vmemmap_psize, "mmu_vmemmap_psize"); + SYMBOL_INIT(free_huge_page, "free_huge_page"); + + SYMBOL_INIT(cpu_pgd, "cpu_pgd"); + SYMBOL_INIT(demote_segment_4k, "demote_segment_4k"); + SYMBOL_INIT(cur_cpu_spec, "cur_cpu_spec"); + + SYMBOL_INIT(divide_error, "divide_error"); + SYMBOL_INIT(idt_table, "idt_table"); + SYMBOL_INIT(saved_command_line, "saved_command_line"); + SYMBOL_INIT(pti_init, "pti_init"); + SYMBOL_INIT(kaiser_init, "kaiser_init"); + + return TRUE; +} + +int +get_structure_info(void) +{ + /* + * Get offsets of the page_discriptor's members. + */ + SIZE_INIT(page, "page"); + OFFSET_INIT(page.flags, "page", "flags"); + OFFSET_INIT(page._refcount, "page", "_refcount"); + if (OFFSET(page._refcount) == NOT_FOUND_STRUCTURE) { + info->flag_use_count = TRUE; + OFFSET_INIT(page._refcount, "page", "_count"); + } else { + info->flag_use_count = FALSE; + } + + OFFSET_INIT(page.mapping, "page", "mapping"); + OFFSET_INIT(page._mapcount, "page", "_mapcount"); + OFFSET_INIT(page.private, "page", "private"); + OFFSET_INIT(page.compound_dtor, "page", "compound_dtor"); + OFFSET_INIT(page.compound_order, "page", "compound_order"); + OFFSET_INIT(page.compound_head, "page", "compound_head"); + + /* + * Some vmlinux(s) don't have debugging information about + * page.mapping. Then, makedumpfile assumes that there is + * "mapping" next to "private(unsigned long)" in the first + * union. + */ + if (OFFSET(page.mapping) == NOT_FOUND_STRUCTURE) { + OFFSET(page.mapping) = get_member_offset("page", NULL, + DWARF_INFO_GET_MEMBER_OFFSET_1ST_UNION); + if (OFFSET(page.mapping) == FAILED_DWARFINFO) + return FALSE; + if (OFFSET(page.mapping) != NOT_FOUND_STRUCTURE) + OFFSET(page.mapping) += sizeof(unsigned long); + } + + OFFSET_INIT(page.lru, "page", "lru"); + + /* + * Get offsets of the mem_section's members. + */ + SIZE_INIT(mem_section, "mem_section"); + OFFSET_INIT(mem_section.section_mem_map, "mem_section", + "section_mem_map"); + + /* + * Get offsets of the pglist_data's members. + */ + SIZE_INIT(pglist_data, "pglist_data"); + OFFSET_INIT(pglist_data.node_zones, "pglist_data", "node_zones"); + OFFSET_INIT(pglist_data.nr_zones, "pglist_data", "nr_zones"); + OFFSET_INIT(pglist_data.node_mem_map, "pglist_data", "node_mem_map"); + OFFSET_INIT(pglist_data.node_start_pfn, "pglist_data","node_start_pfn"); + OFFSET_INIT(pglist_data.node_spanned_pages, "pglist_data", + "node_spanned_pages"); + OFFSET_INIT(pglist_data.pgdat_next, "pglist_data", "pgdat_next"); + + /* + * Get offsets of the zone's members. + */ + SIZE_INIT(zone, "zone"); + OFFSET_INIT(zone.free_pages, "zone", "free_pages"); + OFFSET_INIT(zone.free_area, "zone", "free_area"); + OFFSET_INIT(zone.vm_stat, "zone", "vm_stat"); + OFFSET_INIT(zone.spanned_pages, "zone", "spanned_pages"); + MEMBER_ARRAY_LENGTH_INIT(zone.free_area, "zone", "free_area"); + + /* + * Get offsets of the free_area's members. + */ + SIZE_INIT(free_area, "free_area"); + OFFSET_INIT(free_area.free_list, "free_area", "free_list"); + MEMBER_ARRAY_LENGTH_INIT(free_area.free_list, "free_area", "free_list"); + + /* + * Get offsets of the list_head's members. + */ + SIZE_INIT(list_head, "list_head"); + OFFSET_INIT(list_head.next, "list_head", "next"); + OFFSET_INIT(list_head.prev, "list_head", "prev"); + + /* + * Get offsets of the node_memblk_s's members. + */ + SIZE_INIT(node_memblk_s, "node_memblk_s"); + OFFSET_INIT(node_memblk_s.start_paddr, "node_memblk_s", "start_paddr"); + OFFSET_INIT(node_memblk_s.size, "node_memblk_s", "size"); + OFFSET_INIT(node_memblk_s.nid, "node_memblk_s", "nid"); + + OFFSET_INIT(vm_struct.addr, "vm_struct", "addr"); + OFFSET_INIT(vmap_area.va_start, "vmap_area", "va_start"); + OFFSET_INIT(vmap_area.list, "vmap_area", "list"); + + /* + * Get offset of the module members. + */ + SIZE_INIT(module, "module"); + OFFSET_INIT(module.strtab, "module", "strtab"); + OFFSET_INIT(module.symtab, "module", "symtab"); + OFFSET_INIT(module.num_symtab, "module", "num_symtab"); + OFFSET_INIT(module.list, "module", "list"); + OFFSET_INIT(module.name, "module", "name"); + OFFSET_INIT(module.module_core, "module", "module_core"); + if (OFFSET(module.module_core) == NOT_FOUND_STRUCTURE) { + /* for kernel version 4.5 and above */ + long core_layout; + + OFFSET_INIT(module.module_core, "module", "core_layout"); + core_layout = OFFSET(module.module_core); + OFFSET_INIT(module.module_core, "module_layout", "base"); + OFFSET(module.module_core) += core_layout; + } + OFFSET_INIT(module.core_size, "module", "core_size"); + if (OFFSET(module.core_size) == NOT_FOUND_STRUCTURE) { + /* for kernel version 4.5 and above */ + long core_layout; + + OFFSET_INIT(module.core_size, "module", "core_layout"); + core_layout = OFFSET(module.core_size); + OFFSET_INIT(module.core_size, "module_layout", "size"); + OFFSET(module.core_size) += core_layout; + } + OFFSET_INIT(module.module_init, "module", "module_init"); + if (OFFSET(module.module_init) == NOT_FOUND_STRUCTURE) { + /* for kernel version 4.5 and above */ + long init_layout; + + OFFSET_INIT(module.module_init, "module", "init_layout"); + init_layout = OFFSET(module.module_init); + OFFSET_INIT(module.module_init, "module_layout", "base"); + OFFSET(module.module_init) += init_layout; + } + OFFSET_INIT(module.init_size, "module", "init_size"); + if (OFFSET(module.init_size) == NOT_FOUND_STRUCTURE) { + /* for kernel version 4.5 and above */ + long init_layout; + + OFFSET_INIT(module.init_size, "module", "init_layout"); + init_layout = OFFSET(module.init_size); + OFFSET_INIT(module.init_size, "module_layout", "size"); + OFFSET(module.init_size) += init_layout; + } + + ENUM_NUMBER_INIT(NR_FREE_PAGES, "NR_FREE_PAGES"); + ENUM_NUMBER_INIT(N_ONLINE, "N_ONLINE"); + ENUM_NUMBER_INIT(pgtable_l5_enabled, "pgtable_l5_enabled"); + + ENUM_NUMBER_INIT(PG_lru, "PG_lru"); + ENUM_NUMBER_INIT(PG_private, "PG_private"); + ENUM_NUMBER_INIT(PG_swapcache, "PG_swapcache"); + ENUM_NUMBER_INIT(PG_swapbacked, "PG_swapbacked"); + ENUM_NUMBER_INIT(PG_buddy, "PG_buddy"); + ENUM_NUMBER_INIT(PG_slab, "PG_slab"); + ENUM_NUMBER_INIT(PG_hwpoison, "PG_hwpoison"); + + ENUM_NUMBER_INIT(PG_head_mask, "PG_head_mask"); + if (NUMBER(PG_head_mask) == NOT_FOUND_NUMBER) { + ENUM_NUMBER_INIT(PG_head, "PG_head"); + if (NUMBER(PG_head) == NOT_FOUND_NUMBER) + ENUM_NUMBER_INIT(PG_head, "PG_compound"); + if (NUMBER(PG_head) != NOT_FOUND_NUMBER) + NUMBER(PG_head_mask) = 1UL << NUMBER(PG_head); + } + + ENUM_NUMBER_INIT(HUGETLB_PAGE_DTOR, "HUGETLB_PAGE_DTOR"); + + ENUM_TYPE_SIZE_INIT(pageflags, "pageflags"); + + TYPEDEF_SIZE_INIT(nodemask_t, "nodemask_t"); + + SIZE_INIT(percpu_data, "percpu_data"); + + /* + * Get offset of the elf_prstatus members. + */ + SIZE_INIT(elf_prstatus, "elf_prstatus"); + OFFSET_INIT(elf_prstatus.pr_reg, "elf_prstatus", "pr_reg"); + + /* + * Get size of cpumask and cpumask_t. + */ + SIZE_INIT(cpumask, "cpumask"); + + TYPEDEF_SIZE_INIT(cpumask_t, "cpumask_t"); + + /* + * Get offset of the user_regs_struct members. + */ + SIZE_INIT(user_regs_struct, "user_regs_struct"); + +#ifdef __x86__ + if (SIZE(user_regs_struct) != NOT_FOUND_STRUCTURE) { + OFFSET_INIT(user_regs_struct.bx, "user_regs_struct", "bx"); + OFFSET_INIT(user_regs_struct.cx, "user_regs_struct", "cx"); + OFFSET_INIT(user_regs_struct.dx, "user_regs_struct", "dx"); + OFFSET_INIT(user_regs_struct.si, "user_regs_struct", "si"); + OFFSET_INIT(user_regs_struct.di, "user_regs_struct", "di"); + OFFSET_INIT(user_regs_struct.bp, "user_regs_struct", "bp"); + OFFSET_INIT(user_regs_struct.ax, "user_regs_struct", "ax"); + OFFSET_INIT(user_regs_struct.ds, "user_regs_struct", "ds"); + OFFSET_INIT(user_regs_struct.es, "user_regs_struct", "es"); + OFFSET_INIT(user_regs_struct.fs, "user_regs_struct", "fs"); + OFFSET_INIT(user_regs_struct.gs, "user_regs_struct", "gs"); + OFFSET_INIT(user_regs_struct.orig_ax, "user_regs_struct", + "orig_ax"); + OFFSET_INIT(user_regs_struct.ip, "user_regs_struct", "ip"); + OFFSET_INIT(user_regs_struct.cs, "user_regs_struct", "cs"); + OFFSET_INIT(user_regs_struct.flags, "user_regs_struct", + "flags"); + OFFSET_INIT(user_regs_struct.sp, "user_regs_struct", "sp"); + OFFSET_INIT(user_regs_struct.ss, "user_regs_struct", "ss"); + + if (OFFSET(user_regs_struct.bx) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.bx, "user_regs_struct", "ebx"); + if (OFFSET(user_regs_struct.cx) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.cx, "user_regs_struct", "ecx"); + if (OFFSET(user_regs_struct.dx) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.dx, "user_regs_struct", "edx"); + if (OFFSET(user_regs_struct.si) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.si, "user_regs_struct", "esi"); + if (OFFSET(user_regs_struct.di) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.di, "user_regs_struct", "edi"); + if (OFFSET(user_regs_struct.bp) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.bp, "user_regs_struct", "ebp"); + if (OFFSET(user_regs_struct.ax) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.ax, "user_regs_struct", "eax"); + if (OFFSET(user_regs_struct.orig_ax) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.orig_ax, "user_regs_struct", "orig_eax"); + if (OFFSET(user_regs_struct.ip) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.ip, "user_regs_struct", "eip"); + if (OFFSET(user_regs_struct.flags) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.flags, "user_regs_struct", "eflags"); + if (OFFSET(user_regs_struct.sp) == NOT_FOUND_STRUCTURE) + OFFSET_INIT(user_regs_struct.sp, "user_regs_struct", "esp"); + } else { + /* + * Note: Sometimes kernel debuginfo doesn't contain + * user_regs_struct structure information. Instead, we + * take offsets from actual datatype. + */ + OFFSET(user_regs_struct.bx) = offsetof(struct user_regs_struct, bx); + OFFSET(user_regs_struct.cx) = offsetof(struct user_regs_struct, cx); + OFFSET(user_regs_struct.dx) = offsetof(struct user_regs_struct, dx); + OFFSET(user_regs_struct.si) = offsetof(struct user_regs_struct, si); + OFFSET(user_regs_struct.di) = offsetof(struct user_regs_struct, di); + OFFSET(user_regs_struct.bp) = offsetof(struct user_regs_struct, bp); + OFFSET(user_regs_struct.ax) = offsetof(struct user_regs_struct, ax); + OFFSET(user_regs_struct.ds) = offsetof(struct user_regs_struct, ds); + OFFSET(user_regs_struct.es) = offsetof(struct user_regs_struct, es); + OFFSET(user_regs_struct.fs) = offsetof(struct user_regs_struct, fs); + OFFSET(user_regs_struct.gs) = offsetof(struct user_regs_struct, gs); + OFFSET(user_regs_struct.orig_ax) = offsetof(struct user_regs_struct, orig_ax); + OFFSET(user_regs_struct.ip) = offsetof(struct user_regs_struct, ip); + OFFSET(user_regs_struct.cs) = offsetof(struct user_regs_struct, cs); + OFFSET(user_regs_struct.flags) = offsetof(struct user_regs_struct, flags); + OFFSET(user_regs_struct.sp) = offsetof(struct user_regs_struct, sp); + OFFSET(user_regs_struct.ss) = offsetof(struct user_regs_struct, ss); + } +#endif /* __x86__ */ + +#ifdef __x86_64__ + if (SIZE(user_regs_struct) != NOT_FOUND_STRUCTURE) { + OFFSET_INIT(user_regs_struct.r15, "user_regs_struct", "r15"); + OFFSET_INIT(user_regs_struct.r14, "user_regs_struct", "r14"); + OFFSET_INIT(user_regs_struct.r13, "user_regs_struct", "r13"); + OFFSET_INIT(user_regs_struct.r12, "user_regs_struct", "r12"); + OFFSET_INIT(user_regs_struct.bp, "user_regs_struct", "bp"); + OFFSET_INIT(user_regs_struct.bx, "user_regs_struct", "bx"); + OFFSET_INIT(user_regs_struct.r11, "user_regs_struct", "r11"); + OFFSET_INIT(user_regs_struct.r10, "user_regs_struct", "r10"); + OFFSET_INIT(user_regs_struct.r9, "user_regs_struct", "r9"); + OFFSET_INIT(user_regs_struct.r8, "user_regs_struct", "r8"); + OFFSET_INIT(user_regs_struct.ax, "user_regs_struct", "ax"); + OFFSET_INIT(user_regs_struct.cx, "user_regs_struct", "cx"); + OFFSET_INIT(user_regs_struct.dx, "user_regs_struct", "dx"); + OFFSET_INIT(user_regs_struct.si, "user_regs_struct", "si"); + OFFSET_INIT(user_regs_struct.di, "user_regs_struct", "di"); + OFFSET_INIT(user_regs_struct.orig_ax, "user_regs_struct", + "orig_ax"); + OFFSET_INIT(user_regs_struct.ip, "user_regs_struct", "ip"); + OFFSET_INIT(user_regs_struct.cs, "user_regs_struct", "cs"); + OFFSET_INIT(user_regs_struct.flags, "user_regs_struct", + "flags"); + OFFSET_INIT(user_regs_struct.sp, "user_regs_struct", "sp"); + OFFSET_INIT(user_regs_struct.ss, "user_regs_struct", "ss"); + OFFSET_INIT(user_regs_struct.fs_base, "user_regs_struct", + "fs_base"); + OFFSET_INIT(user_regs_struct.gs_base, "user_regs_struct", + "gs_base"); + OFFSET_INIT(user_regs_struct.ds, "user_regs_struct", "ds"); + OFFSET_INIT(user_regs_struct.es, "user_regs_struct", "es"); + OFFSET_INIT(user_regs_struct.fs, "user_regs_struct", "fs"); + OFFSET_INIT(user_regs_struct.gs, "user_regs_struct", "gs"); + } else { + /* + * Note: Sometimes kernel debuginfo doesn't contain + * user_regs_struct structure information. Instead, we + * take offsets from actual datatype. + */ + OFFSET(user_regs_struct.r15) = offsetof(struct user_regs_struct, r15); + OFFSET(user_regs_struct.r14) = offsetof(struct user_regs_struct, r14); + OFFSET(user_regs_struct.r13) = offsetof(struct user_regs_struct, r13); + OFFSET(user_regs_struct.r12) = offsetof(struct user_regs_struct, r12); + OFFSET(user_regs_struct.bp) = offsetof(struct user_regs_struct, bp); + OFFSET(user_regs_struct.bx) = offsetof(struct user_regs_struct, bx); + OFFSET(user_regs_struct.r11) = offsetof(struct user_regs_struct, r11); + OFFSET(user_regs_struct.r10) = offsetof(struct user_regs_struct, r10); + OFFSET(user_regs_struct.r9) = offsetof(struct user_regs_struct, r9); + OFFSET(user_regs_struct.r8) = offsetof(struct user_regs_struct, r8); + OFFSET(user_regs_struct.ax) = offsetof(struct user_regs_struct, ax); + OFFSET(user_regs_struct.cx) = offsetof(struct user_regs_struct, cx); + OFFSET(user_regs_struct.dx) = offsetof(struct user_regs_struct, dx); + OFFSET(user_regs_struct.si) = offsetof(struct user_regs_struct, si); + OFFSET(user_regs_struct.di) = offsetof(struct user_regs_struct, di); + OFFSET(user_regs_struct.orig_ax) = offsetof(struct user_regs_struct, orig_ax); + OFFSET(user_regs_struct.ip) = offsetof(struct user_regs_struct, ip); + OFFSET(user_regs_struct.cs) = offsetof(struct user_regs_struct, cs); + OFFSET(user_regs_struct.flags) = offsetof(struct user_regs_struct, flags); + OFFSET(user_regs_struct.sp) = offsetof(struct user_regs_struct, sp); + OFFSET(user_regs_struct.ss) = offsetof(struct user_regs_struct, ss); + OFFSET(user_regs_struct.fs_base) = offsetof(struct user_regs_struct, fs_base); + OFFSET(user_regs_struct.gs_base) = offsetof(struct user_regs_struct, gs_base); + OFFSET(user_regs_struct.ds) = offsetof(struct user_regs_struct, ds); + OFFSET(user_regs_struct.es) = offsetof(struct user_regs_struct, es); + OFFSET(user_regs_struct.fs) = offsetof(struct user_regs_struct, fs); + OFFSET(user_regs_struct.gs) = offsetof(struct user_regs_struct, gs); + } +#endif /* __x86_64__ */ + + OFFSET_INIT(kimage.segment, "kimage", "segment"); + + MEMBER_ARRAY_LENGTH_INIT(kimage.segment, "kimage", "segment"); + + SIZE_INIT(kexec_segment, "kexec_segment"); + OFFSET_INIT(kexec_segment.mem, "kexec_segment", "mem"); + + OFFSET_INIT(elf64_hdr.e_phnum, "elf64_hdr", "e_phnum"); + OFFSET_INIT(elf64_hdr.e_phentsize, "elf64_hdr", "e_phentsize"); + OFFSET_INIT(elf64_hdr.e_phoff, "elf64_hdr", "e_phoff"); + + SIZE_INIT(elf64_hdr, "elf64_hdr"); + OFFSET_INIT(elf64_phdr.p_type, "elf64_phdr", "p_type"); + OFFSET_INIT(elf64_phdr.p_offset, "elf64_phdr", "p_offset"); + OFFSET_INIT(elf64_phdr.p_paddr, "elf64_phdr", "p_paddr"); + OFFSET_INIT(elf64_phdr.p_memsz, "elf64_phdr", "p_memsz"); + + SIZE_INIT(printk_log, "printk_log"); + if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) { + /* + * In kernel 3.11-rc4 the log structure name was renamed + * to "printk_log". + */ + info->flag_use_printk_log = TRUE; + OFFSET_INIT(printk_log.ts_nsec, "printk_log", "ts_nsec"); + OFFSET_INIT(printk_log.len, "printk_log", "len"); + OFFSET_INIT(printk_log.text_len, "printk_log", "text_len"); + } else { + info->flag_use_printk_log = FALSE; + SIZE_INIT(printk_log, "log"); + OFFSET_INIT(printk_log.ts_nsec, "log", "ts_nsec"); + OFFSET_INIT(printk_log.len, "log", "len"); + OFFSET_INIT(printk_log.text_len, "log", "text_len"); + } + + /* + * Get offsets of the vmemmap_backing's members. + */ + SIZE_INIT(vmemmap_backing, "vmemmap_backing"); + OFFSET_INIT(vmemmap_backing.phys, "vmemmap_backing", "phys"); + OFFSET_INIT(vmemmap_backing.virt_addr, "vmemmap_backing", "virt_addr"); + OFFSET_INIT(vmemmap_backing.list, "vmemmap_backing", "list"); + + /* + * Get offsets of the mmu_psize_def's members. + */ + SIZE_INIT(mmu_psize_def, "mmu_psize_def"); + OFFSET_INIT(mmu_psize_def.shift, "mmu_psize_def", "shift"); + + /* + * Get offsets of the cpu_spec's members. + */ + SIZE_INIT(cpu_spec, "cpu_spec"); + OFFSET_INIT(cpu_spec.mmu_features, "cpu_spec", "mmu_features"); + + return TRUE; +} + +int +get_srcfile_info(void) +{ + TYPEDEF_SRCFILE_INIT(pud_t, "pud_t"); + + return TRUE; +} + +int +get_value_for_old_linux(void) +{ + if (NUMBER(PG_lru) == NOT_FOUND_NUMBER) + NUMBER(PG_lru) = PG_lru_ORIGINAL; + if (NUMBER(PG_private) == NOT_FOUND_NUMBER) + NUMBER(PG_private) = PG_private_ORIGINAL; + if (NUMBER(PG_swapcache) == NOT_FOUND_NUMBER) + NUMBER(PG_swapcache) = PG_swapcache_ORIGINAL; + if (NUMBER(PG_swapbacked) == NOT_FOUND_NUMBER + && NUMBER(PG_swapcache) < NUMBER(PG_private)) + NUMBER(PG_swapbacked) = NUMBER(PG_private) + 6; + if (NUMBER(PG_slab) == NOT_FOUND_NUMBER) + NUMBER(PG_slab) = PG_slab_ORIGINAL; + if (NUMBER(PG_head_mask) == NOT_FOUND_NUMBER) + NUMBER(PG_head_mask) = 1L << PG_compound_ORIGINAL; + + /* + * The values from here are for free page filtering based on + * mem_map array. These are minimum effort to cover old + * kernels. + * + * The logic also needs offset values for some members of page + * structure. But it much depends on kernel versions. We avoid + * to hard code the values. + */ + if (NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE) == NOT_FOUND_NUMBER) { + if (info->kernel_version == KERNEL_VERSION(2, 6, 38)) + NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE) = + PAGE_BUDDY_MAPCOUNT_VALUE_v2_6_38; + if (info->kernel_version >= KERNEL_VERSION(2, 6, 39)) + NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE) = + PAGE_BUDDY_MAPCOUNT_VALUE_v2_6_39_to_latest_version; + } + if (SIZE(pageflags) == NOT_FOUND_STRUCTURE) { + if (info->kernel_version >= KERNEL_VERSION(2, 6, 27)) + SIZE(pageflags) = + PAGE_FLAGS_SIZE_v2_6_27_to_latest_version; + } + return TRUE; +} + +int +get_str_osrelease_from_vmlinux(void) +{ + int fd; + char *name; + struct utsname system_utsname; + unsigned long long utsname; + off_t offset; + const off_t failed = (off_t)-1; + + /* + * Get the kernel version. + */ + if (SYMBOL(system_utsname) != NOT_FOUND_SYMBOL) { + utsname = SYMBOL(system_utsname); + } else if (SYMBOL(init_uts_ns) != NOT_FOUND_SYMBOL) { + utsname = SYMBOL(init_uts_ns) + sizeof(int); + } else { + ERRMSG("Can't get the symbol of system_utsname.\n"); + return FALSE; + } + get_fileinfo_of_debuginfo(&fd, &name); + + offset = vaddr_to_offset_slow(fd, name, utsname); + if (!offset) { + ERRMSG("Can't convert vaddr (%llx) of utsname to an offset.\n", + utsname); + return FALSE; + } + if (lseek(fd, offset, SEEK_SET) == failed) { + ERRMSG("Can't seek %s. %s\n", name, strerror(errno)); + return FALSE; + } + if (read(fd, &system_utsname, sizeof system_utsname) + != sizeof system_utsname) { + ERRMSG("Can't read %s. %s\n", name, strerror(errno)); + return FALSE; + } + if (!strncpy(info->release, system_utsname.release, STRLEN_OSRELEASE)){ + ERRMSG("Can't do strncpy for osrelease."); + return FALSE; + } + return TRUE; +} + +int +is_sparsemem_extreme(void) +{ + if ((ARRAY_LENGTH(mem_section) + == divideup(NR_MEM_SECTIONS(), _SECTIONS_PER_ROOT_EXTREME())) + || (ARRAY_LENGTH(mem_section) == NOT_FOUND_SYMBOL)) + return TRUE; + else + return FALSE; +} + +int +get_mem_type(void) +{ + int ret; + + if ((SIZE(page) == NOT_FOUND_STRUCTURE) + || (OFFSET(page.flags) == NOT_FOUND_STRUCTURE) + || (OFFSET(page._refcount) == NOT_FOUND_STRUCTURE) + || (OFFSET(page.mapping) == NOT_FOUND_STRUCTURE)) { + ret = NOT_FOUND_MEMTYPE; + } else if ((((SYMBOL(node_data) != NOT_FOUND_SYMBOL) + && (ARRAY_LENGTH(node_data) != NOT_FOUND_STRUCTURE)) + || ((SYMBOL(pgdat_list) != NOT_FOUND_SYMBOL) + && (OFFSET(pglist_data.pgdat_next) != NOT_FOUND_STRUCTURE)) + || ((SYMBOL(pgdat_list) != NOT_FOUND_SYMBOL) + && (ARRAY_LENGTH(pgdat_list) != NOT_FOUND_STRUCTURE))) + && (SIZE(pglist_data) != NOT_FOUND_STRUCTURE) + && (OFFSET(pglist_data.node_mem_map) != NOT_FOUND_STRUCTURE) + && (OFFSET(pglist_data.node_start_pfn) != NOT_FOUND_STRUCTURE) + && (OFFSET(pglist_data.node_spanned_pages) !=NOT_FOUND_STRUCTURE)){ + ret = DISCONTIGMEM; + } else if ((SYMBOL(mem_section) != NOT_FOUND_SYMBOL) + && (SIZE(mem_section) != NOT_FOUND_STRUCTURE) + && (OFFSET(mem_section.section_mem_map) != NOT_FOUND_STRUCTURE)) { + if (is_sparsemem_extreme()) + ret = SPARSEMEM_EX; + else + ret = SPARSEMEM; + } else if (SYMBOL(mem_map) != NOT_FOUND_SYMBOL) { + ret = FLATMEM; + } else { + ret = NOT_FOUND_MEMTYPE; + } + + return ret; +} + +void +write_vmcoreinfo_data(void) +{ + /* + * write 1st kernel's OSRELEASE + */ + fprintf(info->file_vmcoreinfo, "%s%s\n", STR_OSRELEASE, + info->release); + + /* + * write 1st kernel's PAGESIZE + */ + fprintf(info->file_vmcoreinfo, "%s%ld\n", STR_PAGESIZE, + info->page_size); + + /* + * write the symbol of 1st kernel + */ + WRITE_SYMBOL("mem_map", mem_map); + WRITE_SYMBOL("vmem_map", vmem_map); + WRITE_SYMBOL("mem_section", mem_section); + WRITE_SYMBOL("pkmap_count", pkmap_count); + WRITE_SYMBOL("pkmap_count_next", pkmap_count_next); + WRITE_SYMBOL("system_utsname", system_utsname); + WRITE_SYMBOL("init_uts_ns", init_uts_ns); + WRITE_SYMBOL("_stext", _stext); + WRITE_SYMBOL("swapper_pg_dir", swapper_pg_dir); + WRITE_SYMBOL("init_level4_pgt", init_level4_pgt); + WRITE_SYMBOL("level4_kernel_pgt", level4_kernel_pgt); + WRITE_SYMBOL("init_top_pgt", init_top_pgt); + WRITE_SYMBOL("vmlist", vmlist); + WRITE_SYMBOL("vmap_area_list", vmap_area_list); + WRITE_SYMBOL("node_online_map", node_online_map); + WRITE_SYMBOL("node_states", node_states); + WRITE_SYMBOL("node_data", node_data); + WRITE_SYMBOL("pgdat_list", pgdat_list); + WRITE_SYMBOL("contig_page_data", contig_page_data); + WRITE_SYMBOL("log_buf", log_buf); + WRITE_SYMBOL("log_buf_len", log_buf_len); + WRITE_SYMBOL("log_end", log_end); + WRITE_SYMBOL("log_first_idx", log_first_idx); + WRITE_SYMBOL("clear_idx", clear_idx); + WRITE_SYMBOL("log_next_idx", log_next_idx); + WRITE_SYMBOL("max_pfn", max_pfn); + WRITE_SYMBOL("high_memory", high_memory); + WRITE_SYMBOL("node_remap_start_vaddr", node_remap_start_vaddr); + WRITE_SYMBOL("node_remap_end_vaddr", node_remap_end_vaddr); + WRITE_SYMBOL("node_remap_start_pfn", node_remap_start_pfn); + WRITE_SYMBOL("vmemmap_list", vmemmap_list); + WRITE_SYMBOL("mmu_psize_defs", mmu_psize_defs); + WRITE_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize); + WRITE_SYMBOL("cpu_pgd", cpu_pgd); + WRITE_SYMBOL("demote_segment_4k", demote_segment_4k); + WRITE_SYMBOL("cur_cpu_spec", cur_cpu_spec); + WRITE_SYMBOL("free_huge_page", free_huge_page); + + /* + * write the structure size of 1st kernel + */ + WRITE_STRUCTURE_SIZE("page", page); + WRITE_STRUCTURE_SIZE("mem_section", mem_section); + WRITE_STRUCTURE_SIZE("pglist_data", pglist_data); + WRITE_STRUCTURE_SIZE("zone", zone); + WRITE_STRUCTURE_SIZE("free_area", free_area); + WRITE_STRUCTURE_SIZE("list_head", list_head); + WRITE_STRUCTURE_SIZE("node_memblk_s", node_memblk_s); + WRITE_STRUCTURE_SIZE("nodemask_t", nodemask_t); + WRITE_STRUCTURE_SIZE("pageflags", pageflags); + if (info->flag_use_printk_log) + WRITE_STRUCTURE_SIZE("printk_log", printk_log); + else + WRITE_STRUCTURE_SIZE("log", printk_log); + WRITE_STRUCTURE_SIZE("vmemmap_backing", vmemmap_backing); + WRITE_STRUCTURE_SIZE("mmu_psize_def", mmu_psize_def); + + /* + * write the member offset of 1st kernel + */ + WRITE_MEMBER_OFFSET("page.flags", page.flags); + if (info->flag_use_count) + WRITE_MEMBER_OFFSET("page._count", page._refcount); + else + WRITE_MEMBER_OFFSET("page._refcount", page._refcount); + WRITE_MEMBER_OFFSET("page.mapping", page.mapping); + WRITE_MEMBER_OFFSET("page.lru", page.lru); + WRITE_MEMBER_OFFSET("page._mapcount", page._mapcount); + WRITE_MEMBER_OFFSET("page.private", page.private); + WRITE_MEMBER_OFFSET("page.compound_dtor", page.compound_dtor); + WRITE_MEMBER_OFFSET("page.compound_order", page.compound_order); + WRITE_MEMBER_OFFSET("page.compound_head", page.compound_head); + WRITE_MEMBER_OFFSET("mem_section.section_mem_map", + mem_section.section_mem_map); + WRITE_MEMBER_OFFSET("pglist_data.node_zones", pglist_data.node_zones); + WRITE_MEMBER_OFFSET("pglist_data.nr_zones", pglist_data.nr_zones); + WRITE_MEMBER_OFFSET("pglist_data.node_mem_map", + pglist_data.node_mem_map); + WRITE_MEMBER_OFFSET("pglist_data.node_start_pfn", + pglist_data.node_start_pfn); + WRITE_MEMBER_OFFSET("pglist_data.node_spanned_pages", + pglist_data.node_spanned_pages); + WRITE_MEMBER_OFFSET("pglist_data.pgdat_next", pglist_data.pgdat_next); + WRITE_MEMBER_OFFSET("zone.free_pages", zone.free_pages); + WRITE_MEMBER_OFFSET("zone.free_area", zone.free_area); + WRITE_MEMBER_OFFSET("zone.vm_stat", zone.vm_stat); + WRITE_MEMBER_OFFSET("zone.spanned_pages", zone.spanned_pages); + WRITE_MEMBER_OFFSET("free_area.free_list", free_area.free_list); + WRITE_MEMBER_OFFSET("list_head.next", list_head.next); + WRITE_MEMBER_OFFSET("list_head.prev", list_head.prev); + WRITE_MEMBER_OFFSET("node_memblk_s.start_paddr", node_memblk_s.start_paddr); + WRITE_MEMBER_OFFSET("node_memblk_s.size", node_memblk_s.size); + WRITE_MEMBER_OFFSET("node_memblk_s.nid", node_memblk_s.nid); + WRITE_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr); + WRITE_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start); + WRITE_MEMBER_OFFSET("vmap_area.list", vmap_area.list); + if (info->flag_use_printk_log) { + WRITE_MEMBER_OFFSET("printk_log.ts_nsec", printk_log.ts_nsec); + WRITE_MEMBER_OFFSET("printk_log.len", printk_log.len); + WRITE_MEMBER_OFFSET("printk_log.text_len", printk_log.text_len); + } else { + /* Compatibility with pre-3.11-rc4 */ + WRITE_MEMBER_OFFSET("log.ts_nsec", printk_log.ts_nsec); + WRITE_MEMBER_OFFSET("log.len", printk_log.len); + WRITE_MEMBER_OFFSET("log.text_len", printk_log.text_len); + } + WRITE_MEMBER_OFFSET("vmemmap_backing.phys", vmemmap_backing.phys); + WRITE_MEMBER_OFFSET("vmemmap_backing.virt_addr", + vmemmap_backing.virt_addr); + WRITE_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list); + WRITE_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift); + WRITE_MEMBER_OFFSET("cpu_spec.mmu_features", cpu_spec.mmu_features); + + if (SYMBOL(node_data) != NOT_FOUND_SYMBOL) + WRITE_ARRAY_LENGTH("node_data", node_data); + if (SYMBOL(pgdat_list) != NOT_FOUND_SYMBOL) + WRITE_ARRAY_LENGTH("pgdat_list", pgdat_list); + if (SYMBOL(mem_section) != NOT_FOUND_SYMBOL) + WRITE_ARRAY_LENGTH("mem_section", mem_section); + if (SYMBOL(node_memblk) != NOT_FOUND_SYMBOL) + WRITE_ARRAY_LENGTH("node_memblk", node_memblk); + if (SYMBOL(node_remap_start_pfn) != NOT_FOUND_SYMBOL) + WRITE_ARRAY_LENGTH("node_remap_start_pfn", + node_remap_start_pfn); + + WRITE_ARRAY_LENGTH("zone.free_area", zone.free_area); + WRITE_ARRAY_LENGTH("free_area.free_list", free_area.free_list); + + WRITE_NUMBER("NR_FREE_PAGES", NR_FREE_PAGES); + WRITE_NUMBER("N_ONLINE", N_ONLINE); + WRITE_NUMBER("pgtable_l5_enabled", pgtable_l5_enabled); + + WRITE_NUMBER("PG_lru", PG_lru); + WRITE_NUMBER("PG_private", PG_private); + WRITE_NUMBER("PG_head_mask", PG_head_mask); + WRITE_NUMBER("PG_swapcache", PG_swapcache); + WRITE_NUMBER("PG_swapbacked", PG_swapbacked); + WRITE_NUMBER("PG_buddy", PG_buddy); + WRITE_NUMBER("PG_slab", PG_slab); + WRITE_NUMBER("PG_hwpoison", PG_hwpoison); + + WRITE_NUMBER("PAGE_BUDDY_MAPCOUNT_VALUE", PAGE_BUDDY_MAPCOUNT_VALUE); + WRITE_NUMBER("phys_base", phys_base); + + WRITE_NUMBER("HUGETLB_PAGE_DTOR", HUGETLB_PAGE_DTOR); +#ifdef __aarch64__ + WRITE_NUMBER("VA_BITS", VA_BITS); + WRITE_NUMBER_UNSIGNED("PHYS_OFFSET", PHYS_OFFSET); + WRITE_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset); +#endif + + if (info->phys_base) + fprintf(info->file_vmcoreinfo, "%s%lu\n", STR_NUMBER("phys_base"), + info->phys_base); + if (info->kaslr_offset) + fprintf(info->file_vmcoreinfo, "%s%lx\n", STR_KERNELOFFSET, + info->kaslr_offset); + + /* + * write the source file of 1st kernel + */ + WRITE_SRCFILE("pud_t", pud_t); +} + +int +generate_vmcoreinfo(void) +{ + if (!set_page_size(sysconf(_SC_PAGE_SIZE))) + return FALSE; + + set_dwarf_debuginfo("vmlinux", NULL, + info->name_vmlinux, info->fd_vmlinux); + + if (!get_symbol_info()) + return FALSE; + + if (!get_structure_info()) + return FALSE; + + if (!get_srcfile_info()) + return FALSE; + + if ((SYMBOL(system_utsname) == NOT_FOUND_SYMBOL) + && (SYMBOL(init_uts_ns) == NOT_FOUND_SYMBOL)) { + ERRMSG("Can't get the symbol of system_utsname.\n"); + return FALSE; + } + if (!get_str_osrelease_from_vmlinux()) + return FALSE; + + if (!(info->kernel_version = get_kernel_version(info->release))) + return FALSE; + + if (get_mem_type() == NOT_FOUND_MEMTYPE) { + ERRMSG("Can't find the memory type.\n"); + return FALSE; + } + + write_vmcoreinfo_data(); + + return TRUE; +} + +int +read_vmcoreinfo_basic_info(void) +{ + time_t tv_sec = 0; + long page_size = FALSE; + char buf[BUFSIZE_FGETS], *endp; + unsigned int get_release = FALSE, i; + + if (fseek(info->file_vmcoreinfo, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return FALSE; + } + + while (fgets(buf, BUFSIZE_FGETS, info->file_vmcoreinfo)) { + i = strlen(buf); + if (!i) + break; + if (buf[i - 1] == '\n') + buf[i - 1] = '\0'; + if (strncmp(buf, STR_OSRELEASE, strlen(STR_OSRELEASE)) == 0) { + get_release = TRUE; + /* if the release have been stored, skip this time. */ + if (strlen(info->release)) + continue; + strcpy(info->release, buf + strlen(STR_OSRELEASE)); + } + if (strncmp(buf, STR_PAGESIZE, strlen(STR_PAGESIZE)) == 0) { + page_size = strtol(buf+strlen(STR_PAGESIZE),&endp,10); + if ((!page_size || page_size == LONG_MAX) + || strlen(endp) != 0) { + ERRMSG("Invalid data in %s: %s", + info->name_vmcoreinfo, buf); + return FALSE; + } + if (!set_page_size(page_size)) { + ERRMSG("Invalid data in %s: %s", + info->name_vmcoreinfo, buf); + return FALSE; + } + } + if (strncmp(buf, STR_CRASHTIME, strlen(STR_CRASHTIME)) == 0) { + tv_sec = strtol(buf+strlen(STR_CRASHTIME),&endp,10); + if ((!tv_sec || tv_sec == LONG_MAX) + || strlen(endp) != 0) { + ERRMSG("Invalid data in %s: %s", + info->name_vmcoreinfo, buf); + return FALSE; + } + info->timestamp.tv_sec = tv_sec; + } + if (strncmp(buf, STR_CONFIG_X86_PAE, + strlen(STR_CONFIG_X86_PAE)) == 0) + vt.mem_flags |= MEMORY_X86_PAE; + + if (strncmp(buf, STR_CONFIG_PGTABLE_3, + strlen(STR_CONFIG_PGTABLE_3)) == 0) + vt.mem_flags |= MEMORY_PAGETABLE_3L; + + if (strncmp(buf, STR_CONFIG_PGTABLE_4, + strlen(STR_CONFIG_PGTABLE_4)) == 0) + vt.mem_flags |= MEMORY_PAGETABLE_4L; + } + if (!get_release || !info->page_size) { + ERRMSG("Invalid format in %s", info->name_vmcoreinfo); + return FALSE; + } + return TRUE; +} + +unsigned long +read_vmcoreinfo_symbol(char *str_symbol) +{ + unsigned long symbol = NOT_FOUND_SYMBOL; + char buf[BUFSIZE_FGETS], *endp; + unsigned int i; + + if (fseek(info->file_vmcoreinfo, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return INVALID_SYMBOL_DATA; + } + + while (fgets(buf, BUFSIZE_FGETS, info->file_vmcoreinfo)) { + i = strlen(buf); + if (!i) + break; + if (buf[i - 1] == '\n') + buf[i - 1] = '\0'; + if (strncmp(buf, str_symbol, strlen(str_symbol)) == 0) { + symbol = strtoul(buf + strlen(str_symbol), &endp, 16); + if ((!symbol || symbol == ULONG_MAX) + || strlen(endp) != 0) { + ERRMSG("Invalid data in %s: %s", + info->name_vmcoreinfo, buf); + return INVALID_SYMBOL_DATA; + } + break; + } + } + return symbol; +} + +unsigned long +read_vmcoreinfo_ulong(char *str_structure) +{ + long data = NOT_FOUND_LONG_VALUE; + char buf[BUFSIZE_FGETS], *endp; + unsigned int i; + + if (fseek(info->file_vmcoreinfo, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return INVALID_STRUCTURE_DATA; + } + + while (fgets(buf, BUFSIZE_FGETS, info->file_vmcoreinfo)) { + i = strlen(buf); + if (!i) + break; + if (buf[i - 1] == '\n') + buf[i - 1] = '\0'; + if (strncmp(buf, str_structure, strlen(str_structure)) == 0) { + data = strtoul(buf + strlen(str_structure), &endp, 10); + if (strlen(endp) != 0) + data = strtoul(buf + strlen(str_structure), &endp, 16); + if ((data == LONG_MAX) || strlen(endp) != 0) { + ERRMSG("Invalid data in %s: %s", + info->name_vmcoreinfo, buf); + return INVALID_STRUCTURE_DATA; + } + break; + } + } + return data; +} + +long +read_vmcoreinfo_long(char *str_structure) +{ + long data = NOT_FOUND_LONG_VALUE; + char buf[BUFSIZE_FGETS], *endp; + unsigned int i; + + if (fseek(info->file_vmcoreinfo, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return INVALID_STRUCTURE_DATA; + } + + while (fgets(buf, BUFSIZE_FGETS, info->file_vmcoreinfo)) { + i = strlen(buf); + if (!i) + break; + if (buf[i - 1] == '\n') + buf[i - 1] = '\0'; + if (strncmp(buf, str_structure, strlen(str_structure)) == 0) { + data = strtol(buf + strlen(str_structure), &endp, 10); + if (strlen(endp) != 0) + data = strtol(buf + strlen(str_structure), &endp, 16); + if ((data == LONG_MAX) || strlen(endp) != 0) { + ERRMSG("Invalid data in %s: %s", + info->name_vmcoreinfo, buf); + return INVALID_STRUCTURE_DATA; + } + break; + } + } + return data; +} + +int +read_vmcoreinfo_string(char *str_in, char *str_out) +{ + char buf[BUFSIZE_FGETS]; + unsigned int i; + + if (fseek(info->file_vmcoreinfo, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return FALSE; + } + + while (fgets(buf, BUFSIZE_FGETS, info->file_vmcoreinfo)) { + i = strlen(buf); + if (!i) + break; + if (buf[i - 1] == '\n') + buf[i - 1] = '\0'; + if (strncmp(buf, str_in, strlen(str_in)) == 0) { + strncpy(str_out, buf + strlen(str_in), LEN_SRCFILE - strlen(str_in)); + break; + } + } + return TRUE; +} + +int +read_vmcoreinfo(void) +{ + if (!read_vmcoreinfo_basic_info()) + return FALSE; + + READ_SYMBOL("mem_map", mem_map); + READ_SYMBOL("vmem_map", vmem_map); + READ_SYMBOL("mem_section", mem_section); + READ_SYMBOL("pkmap_count", pkmap_count); + READ_SYMBOL("pkmap_count_next", pkmap_count_next); + READ_SYMBOL("system_utsname", system_utsname); + READ_SYMBOL("init_uts_ns", init_uts_ns); + READ_SYMBOL("_stext", _stext); + READ_SYMBOL("swapper_pg_dir", swapper_pg_dir); + READ_SYMBOL("init_level4_pgt", init_level4_pgt); + READ_SYMBOL("level4_kernel_pgt", level4_kernel_pgt); + READ_SYMBOL("init_top_pgt", init_top_pgt); + READ_SYMBOL("vmlist", vmlist); + READ_SYMBOL("vmap_area_list", vmap_area_list); + READ_SYMBOL("node_online_map", node_online_map); + READ_SYMBOL("node_states", node_states); + READ_SYMBOL("node_data", node_data); + READ_SYMBOL("pgdat_list", pgdat_list); + READ_SYMBOL("contig_page_data", contig_page_data); + READ_SYMBOL("log_buf", log_buf); + READ_SYMBOL("log_buf_len", log_buf_len); + READ_SYMBOL("log_end", log_end); + READ_SYMBOL("log_first_idx", log_first_idx); + READ_SYMBOL("clear_idx", clear_idx); + READ_SYMBOL("log_next_idx", log_next_idx); + READ_SYMBOL("max_pfn", max_pfn); + READ_SYMBOL("high_memory", high_memory); + READ_SYMBOL("node_remap_start_vaddr", node_remap_start_vaddr); + READ_SYMBOL("node_remap_end_vaddr", node_remap_end_vaddr); + READ_SYMBOL("node_remap_start_pfn", node_remap_start_pfn); + READ_SYMBOL("vmemmap_list", vmemmap_list); + READ_SYMBOL("mmu_psize_defs", mmu_psize_defs); + READ_SYMBOL("mmu_vmemmap_psize", mmu_vmemmap_psize); + READ_SYMBOL("cpu_pgd", cpu_pgd); + READ_SYMBOL("demote_segment_4k", demote_segment_4k); + READ_SYMBOL("cur_cpu_spec", cur_cpu_spec); + READ_SYMBOL("free_huge_page", free_huge_page); + + READ_STRUCTURE_SIZE("page", page); + READ_STRUCTURE_SIZE("mem_section", mem_section); + READ_STRUCTURE_SIZE("pglist_data", pglist_data); + READ_STRUCTURE_SIZE("zone", zone); + READ_STRUCTURE_SIZE("free_area", free_area); + READ_STRUCTURE_SIZE("list_head", list_head); + READ_STRUCTURE_SIZE("node_memblk_s", node_memblk_s); + READ_STRUCTURE_SIZE("nodemask_t", nodemask_t); + READ_STRUCTURE_SIZE("pageflags", pageflags); + READ_STRUCTURE_SIZE("vmemmap_backing", vmemmap_backing); + READ_STRUCTURE_SIZE("mmu_psize_def", mmu_psize_def); + + + READ_MEMBER_OFFSET("page.flags", page.flags); + READ_MEMBER_OFFSET("page._refcount", page._refcount); + if (OFFSET(page._refcount) == NOT_FOUND_STRUCTURE) { + info->flag_use_count = TRUE; + READ_MEMBER_OFFSET("page._count", page._refcount); + } else { + info->flag_use_count = FALSE; + } + READ_MEMBER_OFFSET("page.mapping", page.mapping); + READ_MEMBER_OFFSET("page.lru", page.lru); + READ_MEMBER_OFFSET("page._mapcount", page._mapcount); + READ_MEMBER_OFFSET("page.private", page.private); + READ_MEMBER_OFFSET("page.compound_dtor", page.compound_dtor); + READ_MEMBER_OFFSET("page.compound_order", page.compound_order); + READ_MEMBER_OFFSET("page.compound_head", page.compound_head); + READ_MEMBER_OFFSET("mem_section.section_mem_map", + mem_section.section_mem_map); + READ_MEMBER_OFFSET("pglist_data.node_zones", pglist_data.node_zones); + READ_MEMBER_OFFSET("pglist_data.nr_zones", pglist_data.nr_zones); + READ_MEMBER_OFFSET("pglist_data.node_mem_map",pglist_data.node_mem_map); + READ_MEMBER_OFFSET("pglist_data.node_start_pfn", + pglist_data.node_start_pfn); + READ_MEMBER_OFFSET("pglist_data.node_spanned_pages", + pglist_data.node_spanned_pages); + READ_MEMBER_OFFSET("pglist_data.pgdat_next", pglist_data.pgdat_next); + READ_MEMBER_OFFSET("zone.free_pages", zone.free_pages); + READ_MEMBER_OFFSET("zone.free_area", zone.free_area); + READ_MEMBER_OFFSET("zone.vm_stat", zone.vm_stat); + READ_MEMBER_OFFSET("zone.spanned_pages", zone.spanned_pages); + READ_MEMBER_OFFSET("free_area.free_list", free_area.free_list); + READ_MEMBER_OFFSET("list_head.next", list_head.next); + READ_MEMBER_OFFSET("list_head.prev", list_head.prev); + READ_MEMBER_OFFSET("node_memblk_s.start_paddr", node_memblk_s.start_paddr); + READ_MEMBER_OFFSET("node_memblk_s.size", node_memblk_s.size); + READ_MEMBER_OFFSET("node_memblk_s.nid", node_memblk_s.nid); + READ_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr); + READ_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start); + READ_MEMBER_OFFSET("vmap_area.list", vmap_area.list); + READ_MEMBER_OFFSET("vmemmap_backing.phys", vmemmap_backing.phys); + READ_MEMBER_OFFSET("vmemmap_backing.virt_addr", + vmemmap_backing.virt_addr); + READ_MEMBER_OFFSET("vmemmap_backing.list", vmemmap_backing.list); + READ_MEMBER_OFFSET("mmu_psize_def.shift", mmu_psize_def.shift); + READ_MEMBER_OFFSET("cpu_spec.mmu_features", cpu_spec.mmu_features); + + READ_STRUCTURE_SIZE("printk_log", printk_log); + if (SIZE(printk_log) != NOT_FOUND_STRUCTURE) { + info->flag_use_printk_log = TRUE; + READ_MEMBER_OFFSET("printk_log.ts_nsec", printk_log.ts_nsec); + READ_MEMBER_OFFSET("printk_log.len", printk_log.len); + READ_MEMBER_OFFSET("printk_log.text_len", printk_log.text_len); + } else { + info->flag_use_printk_log = FALSE; + READ_STRUCTURE_SIZE("log", printk_log); + READ_MEMBER_OFFSET("log.ts_nsec", printk_log.ts_nsec); + READ_MEMBER_OFFSET("log.len", printk_log.len); + READ_MEMBER_OFFSET("log.text_len", printk_log.text_len); + } + + READ_ARRAY_LENGTH("node_data", node_data); + READ_ARRAY_LENGTH("pgdat_list", pgdat_list); + READ_ARRAY_LENGTH("mem_section", mem_section); + READ_ARRAY_LENGTH("node_memblk", node_memblk); + READ_ARRAY_LENGTH("zone.free_area", zone.free_area); + READ_ARRAY_LENGTH("free_area.free_list", free_area.free_list); + READ_ARRAY_LENGTH("node_remap_start_pfn", node_remap_start_pfn); + + READ_NUMBER("NR_FREE_PAGES", NR_FREE_PAGES); + READ_NUMBER("N_ONLINE", N_ONLINE); + READ_NUMBER("pgtable_l5_enabled", pgtable_l5_enabled); + + READ_NUMBER("PG_lru", PG_lru); + READ_NUMBER("PG_private", PG_private); + READ_NUMBER("PG_head_mask", PG_head_mask); + READ_NUMBER("PG_swapcache", PG_swapcache); + READ_NUMBER("PG_swapbacked", PG_swapbacked); + READ_NUMBER("PG_slab", PG_slab); + READ_NUMBER("PG_buddy", PG_buddy); + READ_NUMBER("PG_hwpoison", PG_hwpoison); + READ_NUMBER("SECTION_SIZE_BITS", SECTION_SIZE_BITS); + READ_NUMBER("MAX_PHYSMEM_BITS", MAX_PHYSMEM_BITS); + + READ_SRCFILE("pud_t", pud_t); + + READ_NUMBER("PAGE_BUDDY_MAPCOUNT_VALUE", PAGE_BUDDY_MAPCOUNT_VALUE); + READ_NUMBER("phys_base", phys_base); +#ifdef __aarch64__ + READ_NUMBER("VA_BITS", VA_BITS); + READ_NUMBER_UNSIGNED("PHYS_OFFSET", PHYS_OFFSET); + READ_NUMBER_UNSIGNED("kimage_voffset", kimage_voffset); +#endif + + READ_NUMBER("HUGETLB_PAGE_DTOR", HUGETLB_PAGE_DTOR); + + return TRUE; +} + +/* + * Extract vmcoreinfo from /proc/vmcore and output it to /tmp/vmcoreinfo.tmp. + */ +int +copy_vmcoreinfo(off_t offset, unsigned long size) +{ + int fd; + char buf[VMCOREINFO_BYTES]; + const off_t failed = (off_t)-1; + + if (!offset || !size) + return FALSE; + + if ((fd = mkstemp(info->name_vmcoreinfo)) < 0) { + ERRMSG("Can't open the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return FALSE; + } + if (lseek(info->fd_memory, offset, SEEK_SET) == failed) { + ERRMSG("Can't seek the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + if (read(info->fd_memory, &buf, size) != size) { + ERRMSG("Can't read the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + if (write(fd, &buf, size) != size) { + ERRMSG("Can't write the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return FALSE; + } + if (close(fd) < 0) { + ERRMSG("Can't close the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return FALSE; + } + return TRUE; +} + +int +read_vmcoreinfo_from_vmcore(off_t offset, unsigned long size, int flag_xen_hv) +{ + int ret = FALSE; + + /* + * Copy vmcoreinfo to /tmp/vmcoreinfoXXXXXX. + */ + if (!(info->name_vmcoreinfo = strdup(FILENAME_VMCOREINFO))) { + MSG("Can't duplicate strings(%s).\n", FILENAME_VMCOREINFO); + return FALSE; + } + if (!copy_vmcoreinfo(offset, size)) + goto out; + + /* + * Read vmcoreinfo from /tmp/vmcoreinfoXXXXXX. + */ + if (!open_vmcoreinfo("r")) + goto out; + + unlink(info->name_vmcoreinfo); + + if (flag_xen_hv) { + if (!read_vmcoreinfo_xen()) + goto out; + } else { + if (!read_vmcoreinfo()) + goto out; + } + close_vmcoreinfo(); + + ret = TRUE; +out: + free(info->name_vmcoreinfo); + info->name_vmcoreinfo = NULL; + + return ret; +} + +/* + * Get the number of online nodes. + */ +int +get_nodes_online(void) +{ + int len, i, j, online; + unsigned long node_online_map = 0, bitbuf, *maskptr; + + if ((SYMBOL(node_online_map) == NOT_FOUND_SYMBOL) + && (SYMBOL(node_states) == NOT_FOUND_SYMBOL)) + return 0; + + if (SIZE(nodemask_t) == NOT_FOUND_STRUCTURE) { + ERRMSG("Can't get the size of nodemask_t.\n"); + return 0; + } + + len = SIZE(nodemask_t); + vt.node_online_map_len = len/sizeof(unsigned long); + if (!(vt.node_online_map = (unsigned long *)malloc(len))) { + ERRMSG("Can't allocate memory for the node online map. %s\n", + strerror(errno)); + return 0; + } + if (SYMBOL(node_online_map) != NOT_FOUND_SYMBOL) { + node_online_map = SYMBOL(node_online_map); + } else if (SYMBOL(node_states) != NOT_FOUND_SYMBOL) { + /* + * For linux-2.6.23-rc4-mm1 + */ + node_online_map = SYMBOL(node_states) + + (SIZE(nodemask_t) * NUMBER(N_ONLINE)); + } + if (!readmem(VADDR, node_online_map, vt.node_online_map, len)){ + ERRMSG("Can't get the node online map.\n"); + return 0; + } + online = 0; + maskptr = (unsigned long *)vt.node_online_map; + for (i = 0; i < vt.node_online_map_len; i++, maskptr++) { + bitbuf = *maskptr; + for (j = 0; j < sizeof(bitbuf) * 8; j++) { + online += bitbuf & 1; + bitbuf = bitbuf >> 1; + } + } + return online; +} + +int +get_numnodes(void) +{ + if (!(vt.numnodes = get_nodes_online())) { + vt.numnodes = 1; + } + DEBUG_MSG("\n"); + DEBUG_MSG("num of NODEs : %d\n", vt.numnodes); + DEBUG_MSG("\n"); + + return TRUE; +} + +int +next_online_node(int first) +{ + int i, j, node; + unsigned long mask, *maskptr; + + /* It cannot occur */ + if ((first/(sizeof(unsigned long) * 8)) >= vt.node_online_map_len) { + ERRMSG("next_online_node: %d is too large!\n", first); + return -1; + } + + maskptr = (unsigned long *)vt.node_online_map; + for (i = node = 0; i < vt.node_online_map_len; i++, maskptr++) { + mask = *maskptr; + for (j = 0; j < (sizeof(unsigned long) * 8); j++, node++) { + if (mask & 1) { + if (node >= first) + return node; + } + mask >>= 1; + } + } + return -1; +} + +unsigned long +next_online_pgdat(int node) +{ + int i; + unsigned long pgdat; + + /* + * Get the pglist_data structure from symbol "node_data". + * The array number of symbol "node_data" cannot be gotten + * from vmlinux. Instead, check it is DW_TAG_array_type. + */ + if ((SYMBOL(node_data) == NOT_FOUND_SYMBOL) + || (ARRAY_LENGTH(node_data) == NOT_FOUND_STRUCTURE)) + goto pgdat2; + + if (!readmem(VADDR, SYMBOL(node_data) + (node * sizeof(void *)), + &pgdat, sizeof pgdat)) + goto pgdat2; + + if (!is_kvaddr(pgdat)) + goto pgdat2; + + return pgdat; + +pgdat2: + /* + * Get the pglist_data structure from symbol "pgdat_list". + */ + if (SYMBOL(pgdat_list) == NOT_FOUND_SYMBOL) + goto pgdat3; + + else if ((0 < node) + && (ARRAY_LENGTH(pgdat_list) == NOT_FOUND_STRUCTURE)) + goto pgdat3; + + else if ((ARRAY_LENGTH(pgdat_list) != NOT_FOUND_STRUCTURE) + && (ARRAY_LENGTH(pgdat_list) < node)) + goto pgdat3; + + if (!readmem(VADDR, SYMBOL(pgdat_list) + (node * sizeof(void *)), + &pgdat, sizeof pgdat)) + goto pgdat3; + + if (!is_kvaddr(pgdat)) + goto pgdat3; + + return pgdat; + +pgdat3: + /* + * linux-2.6.16 or former + */ + if ((SYMBOL(pgdat_list) == NOT_FOUND_SYMBOL) + || (OFFSET(pglist_data.pgdat_next) == NOT_FOUND_STRUCTURE)) + goto pgdat4; + + if (!readmem(VADDR, SYMBOL(pgdat_list), &pgdat, sizeof pgdat)) + goto pgdat4; + + if (!is_kvaddr(pgdat)) + goto pgdat4; + + if (node == 0) + return pgdat; + + for (i = 1; i <= node; i++) { + if (!readmem(VADDR, pgdat+OFFSET(pglist_data.pgdat_next), + &pgdat, sizeof pgdat)) + goto pgdat4; + + if (!is_kvaddr(pgdat)) + goto pgdat4; + } + return pgdat; + +pgdat4: + /* + * Get the pglist_data structure from symbol "contig_page_data". + */ + if (SYMBOL(contig_page_data) == NOT_FOUND_SYMBOL) + return FALSE; + + if (node != 0) + return FALSE; + + return SYMBOL(contig_page_data); +} + +void +dump_mem_map(mdf_pfn_t pfn_start, mdf_pfn_t pfn_end, + unsigned long mem_map, int num_mm) +{ + struct mem_map_data *mmd; + + mmd = &info->mem_map_data[num_mm]; + mmd->pfn_start = pfn_start; + mmd->pfn_end = pfn_end; + mmd->mem_map = mem_map; + + DEBUG_MSG("mem_map (%d)\n", num_mm); + DEBUG_MSG(" mem_map : %lx\n", mem_map); + DEBUG_MSG(" pfn_start : %llx\n", pfn_start); + DEBUG_MSG(" pfn_end : %llx\n", pfn_end); + + return; +} + +int +get_mm_flatmem(void) +{ + unsigned long mem_map; + + /* + * Get the address of the symbol "mem_map". + */ + if (!readmem(VADDR, SYMBOL(mem_map), &mem_map, sizeof mem_map) + || !mem_map) { + ERRMSG("Can't get the address of mem_map.\n"); + return FALSE; + } + info->num_mem_map = 1; + if ((info->mem_map_data = (struct mem_map_data *) + malloc(sizeof(struct mem_map_data)*info->num_mem_map)) == NULL) { + ERRMSG("Can't allocate memory for the mem_map_data. %s\n", + strerror(errno)); + return FALSE; + } + if (is_xen_memory()) + dump_mem_map(0, info->dom0_mapnr, mem_map, 0); + else + dump_mem_map(0, info->max_mapnr, mem_map, 0); + + return TRUE; +} + +int +get_node_memblk(int num_memblk, + unsigned long *start_paddr, unsigned long *size, int *nid) +{ + unsigned long node_memblk; + + if (ARRAY_LENGTH(node_memblk) <= num_memblk) { + ERRMSG("Invalid num_memblk.\n"); + return FALSE; + } + node_memblk = SYMBOL(node_memblk) + SIZE(node_memblk_s) * num_memblk; + if (!readmem(VADDR, node_memblk+OFFSET(node_memblk_s.start_paddr), + start_paddr, sizeof(unsigned long))) { + ERRMSG("Can't get node_memblk_s.start_paddr.\n"); + return FALSE; + } + if (!readmem(VADDR, node_memblk + OFFSET(node_memblk_s.size), + size, sizeof(unsigned long))) { + ERRMSG("Can't get node_memblk_s.size.\n"); + return FALSE; + } + if (!readmem(VADDR, node_memblk + OFFSET(node_memblk_s.nid), + nid, sizeof(int))) { + ERRMSG("Can't get node_memblk_s.nid.\n"); + return FALSE; + } + return TRUE; +} + +int +get_num_mm_discontigmem(void) +{ + int i, nid; + unsigned long start_paddr, size; + + if ((SYMBOL(node_memblk) == NOT_FOUND_SYMBOL) + || (ARRAY_LENGTH(node_memblk) == NOT_FOUND_STRUCTURE) + || (SIZE(node_memblk_s) == NOT_FOUND_STRUCTURE) + || (OFFSET(node_memblk_s.start_paddr) == NOT_FOUND_STRUCTURE) + || (OFFSET(node_memblk_s.size) == NOT_FOUND_STRUCTURE) + || (OFFSET(node_memblk_s.nid) == NOT_FOUND_STRUCTURE)) { + return vt.numnodes; + } else { + for (i = 0; i < ARRAY_LENGTH(node_memblk); i++) { + if (!get_node_memblk(i, &start_paddr, &size, &nid)) { + ERRMSG("Can't get the node_memblk (%d)\n", i); + return 0; + } + if (!start_paddr && !size &&!nid) + break; + + DEBUG_MSG("nid : %d\n", nid); + DEBUG_MSG(" start_paddr: %lx\n", start_paddr); + DEBUG_MSG(" size : %lx\n", size); + } + if (i == 0) { + /* + * On non-NUMA systems, node_memblk_s is not set. + */ + return vt.numnodes; + } else { + return i; + } + } +} + +int +separate_mem_map(struct mem_map_data *mmd, int *id_mm, int nid_pgdat, + unsigned long mem_map_pgdat, unsigned long pfn_start_pgdat) +{ + int i, nid; + unsigned long start_paddr, size, pfn_start, pfn_end, mem_map; + + for (i = 0; i < ARRAY_LENGTH(node_memblk); i++) { + if (!get_node_memblk(i, &start_paddr, &size, &nid)) { + ERRMSG("Can't get the node_memblk (%d)\n", i); + return FALSE; + } + if (!start_paddr && !size && !nid) + break; + + /* + * Check pglist_data.node_id and node_memblk_s.nid match. + */ + if (nid_pgdat != nid) + continue; + + pfn_start = paddr_to_pfn(start_paddr); + pfn_end = paddr_to_pfn(start_paddr + size); + + if (pfn_start < pfn_start_pgdat) { + ERRMSG("node_memblk_s.start_paddr of node (%d) is invalid.\n", nid); + return FALSE; + } + if (info->max_mapnr < pfn_end) { + DEBUG_MSG("pfn_end of node (%d) is over max_mapnr.\n", + nid); + DEBUG_MSG(" pfn_start: %lx\n", pfn_start); + DEBUG_MSG(" pfn_end : %lx\n", pfn_end); + DEBUG_MSG(" max_mapnr: %llx\n", info->max_mapnr); + + pfn_end = info->max_mapnr; + } + + mem_map = mem_map_pgdat+SIZE(page)*(pfn_start-pfn_start_pgdat); + + mmd->pfn_start = pfn_start; + mmd->pfn_end = pfn_end; + mmd->mem_map = mem_map; + + mmd++; + (*id_mm)++; + } + return TRUE; +} + +int +get_mm_discontigmem(void) +{ + int i, j, id_mm, node, num_mem_map, separate_mm = FALSE; + unsigned long pgdat, mem_map, pfn_start, pfn_end, node_spanned_pages; + unsigned long vmem_map; + struct mem_map_data temp_mmd; + + num_mem_map = get_num_mm_discontigmem(); + if (num_mem_map < vt.numnodes) { + ERRMSG("Can't get the number of mem_map.\n"); + return FALSE; + } + struct mem_map_data mmd[num_mem_map]; + if (vt.numnodes < num_mem_map) { + separate_mm = TRUE; + } + + /* + * Note: + * This note is only for ia64 discontigmem kernel. + * It is better to take mem_map information from a symbol vmem_map + * instead of pglist_data.node_mem_map, because some node_mem_map + * sometimes does not have mem_map information corresponding to its + * node_start_pfn. + */ + if (SYMBOL(vmem_map) != NOT_FOUND_SYMBOL) { + if (!readmem(VADDR, SYMBOL(vmem_map), &vmem_map, sizeof vmem_map)) { + ERRMSG("Can't get vmem_map.\n"); + return FALSE; + } + } + + /* + * Get the first node_id. + */ + if ((node = next_online_node(0)) < 0) { + ERRMSG("Can't get next online node.\n"); + return FALSE; + } + if (!(pgdat = next_online_pgdat(node))) { + ERRMSG("Can't get pgdat list.\n"); + return FALSE; + } + id_mm = 0; + for (i = 0; i < vt.numnodes; i++) { + if (!readmem(VADDR, pgdat + OFFSET(pglist_data.node_start_pfn), + &pfn_start, sizeof pfn_start)) { + ERRMSG("Can't get node_start_pfn.\n"); + return FALSE; + } + if (!readmem(VADDR,pgdat+OFFSET(pglist_data.node_spanned_pages), + &node_spanned_pages, sizeof node_spanned_pages)) { + ERRMSG("Can't get node_spanned_pages.\n"); + return FALSE; + } + pfn_end = pfn_start + node_spanned_pages; + + if (SYMBOL(vmem_map) == NOT_FOUND_SYMBOL) { + if (!readmem(VADDR, pgdat + OFFSET(pglist_data.node_mem_map), + &mem_map, sizeof mem_map)) { + ERRMSG("Can't get mem_map.\n"); + return FALSE; + } + } else + mem_map = vmem_map + (SIZE(page) * pfn_start); + + if (separate_mm) { + /* + * For some ia64 NUMA systems. + * On some systems, a node has the separated memory. + * And pglist_data(s) have the duplicated memory range + * like following: + * + * Nid: Physical address + * 0 : 0x1000000000 - 0x2000000000 + * 1 : 0x2000000000 - 0x3000000000 + * 2 : 0x0000000000 - 0x6020000000 <- Overlapping + * 3 : 0x3000000000 - 0x4000000000 + * 4 : 0x4000000000 - 0x5000000000 + * 5 : 0x5000000000 - 0x6000000000 + * + * Then, mem_map(s) should be separated by + * node_memblk_s info. + */ + if (!separate_mem_map(&mmd[id_mm], &id_mm, node, + mem_map, pfn_start)) { + ERRMSG("Can't separate mem_map.\n"); + return FALSE; + } + } else { + if (info->max_mapnr < pfn_end) { + DEBUG_MSG("pfn_end of node (%d) is over max_mapnr.\n", + node); + DEBUG_MSG(" pfn_start: %lx\n", pfn_start); + DEBUG_MSG(" pfn_end : %lx\n", pfn_end); + DEBUG_MSG(" max_mapnr: %llx\n", info->max_mapnr); + + pfn_end = info->max_mapnr; + } + + /* + * The number of mem_map is the same as the number + * of nodes. + */ + mmd[id_mm].pfn_start = pfn_start; + mmd[id_mm].pfn_end = pfn_end; + mmd[id_mm].mem_map = mem_map; + id_mm++; + } + + /* + * Get pglist_data of the next node. + */ + if (i < (vt.numnodes - 1)) { + if ((node = next_online_node(node + 1)) < 0) { + ERRMSG("Can't get next online node.\n"); + return FALSE; + } else if (!(pgdat = next_online_pgdat(node))) { + ERRMSG("Can't determine pgdat list (node %d).\n", + node); + return FALSE; + } + } + } + + /* + * Sort mem_map by pfn_start. + */ + for (i = 0; i < (num_mem_map - 1); i++) { + for (j = i + 1; j < num_mem_map; j++) { + if (mmd[j].pfn_start < mmd[i].pfn_start) { + temp_mmd = mmd[j]; + mmd[j] = mmd[i]; + mmd[i] = temp_mmd; + } + } + } + + /* + * Calculate the number of mem_map. + */ + info->num_mem_map = num_mem_map; + if (mmd[0].pfn_start != 0) + info->num_mem_map++; + + for (i = 0; i < num_mem_map - 1; i++) { + if (mmd[i].pfn_end > mmd[i + 1].pfn_start) { + ERRMSG("The mem_map is overlapped with the next one.\n"); + ERRMSG("mmd[%d].pfn_end = %llx\n", i, mmd[i].pfn_end); + ERRMSG("mmd[%d].pfn_start = %llx\n", i + 1, mmd[i + 1].pfn_start); + return FALSE; + } else if (mmd[i].pfn_end == mmd[i + 1].pfn_start) + /* + * Continuous mem_map + */ + continue; + + /* + * Discontinuous mem_map + */ + info->num_mem_map++; + } + if (mmd[num_mem_map - 1].pfn_end < info->max_mapnr) + info->num_mem_map++; + + if ((info->mem_map_data = (struct mem_map_data *) + malloc(sizeof(struct mem_map_data)*info->num_mem_map)) == NULL) { + ERRMSG("Can't allocate memory for the mem_map_data. %s\n", + strerror(errno)); + return FALSE; + } + + /* + * Create mem_map data. + */ + id_mm = 0; + if (mmd[0].pfn_start != 0) { + dump_mem_map(0, mmd[0].pfn_start, NOT_MEMMAP_ADDR, id_mm); + id_mm++; + } + for (i = 0; i < num_mem_map; i++) { + dump_mem_map(mmd[i].pfn_start, mmd[i].pfn_end, + mmd[i].mem_map, id_mm); + id_mm++; + if ((i < num_mem_map - 1) + && (mmd[i].pfn_end != mmd[i + 1].pfn_start)) { + dump_mem_map(mmd[i].pfn_end, mmd[i +1].pfn_start, + NOT_MEMMAP_ADDR, id_mm); + id_mm++; + } + } + i = num_mem_map - 1; + if (is_xen_memory()) { + if (mmd[i].pfn_end < info->dom0_mapnr) + dump_mem_map(mmd[i].pfn_end, info->dom0_mapnr, + NOT_MEMMAP_ADDR, id_mm); + } else { + if (mmd[i].pfn_end < info->max_mapnr) + dump_mem_map(mmd[i].pfn_end, info->max_mapnr, + NOT_MEMMAP_ADDR, id_mm); + } + return TRUE; +} + +static unsigned long +nr_to_section(unsigned long nr, unsigned long *mem_sec) +{ + unsigned long addr; + + if (is_sparsemem_extreme()) { + if (mem_sec[SECTION_NR_TO_ROOT(nr)] == 0) + return NOT_KV_ADDR; + addr = mem_sec[SECTION_NR_TO_ROOT(nr)] + + (nr & SECTION_ROOT_MASK()) * SIZE(mem_section); + } else { + addr = SYMBOL(mem_section) + (nr * SIZE(mem_section)); + } + + return addr; +} + +static unsigned long +section_mem_map_addr(unsigned long addr, unsigned long *map_mask) +{ + char *mem_section; + unsigned long map; + unsigned long mask; + + *map_mask = 0; + + if (!is_kvaddr(addr)) + return NOT_KV_ADDR; + + if ((mem_section = malloc(SIZE(mem_section))) == NULL) { + ERRMSG("Can't allocate memory for a struct mem_section. %s\n", + strerror(errno)); + return NOT_KV_ADDR; + } + if (!readmem(VADDR, addr, mem_section, SIZE(mem_section))) { + ERRMSG("Can't get a struct mem_section(%lx).\n", addr); + free(mem_section); + return NOT_KV_ADDR; + } + map = ULONG(mem_section + OFFSET(mem_section.section_mem_map)); + mask = SECTION_MAP_MASK; + *map_mask = map & ~mask; + if (map == 0x0) + *map_mask |= SECTION_MARKED_PRESENT; + map &= mask; + free(mem_section); + + return map; +} + +static unsigned long +sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long section_nr) +{ + unsigned long mem_map; + + mem_map = coded_mem_map + + (SECTION_NR_TO_PFN(section_nr) * SIZE(page)); + + return mem_map; +} + +/* + * On some kernels, mem_section may be a pointer or an array, when + * SPARSEMEM_EXTREME is on. + * + * We assume that section_mem_map is either 0 or has the present bit set. + * + */ + +static int +validate_mem_section(unsigned long *mem_sec, + unsigned long mem_section_ptr, unsigned int mem_section_size, + unsigned long *mem_maps, unsigned int num_section) +{ + unsigned int section_nr; + unsigned long map_mask; + unsigned long section, mem_map; + int ret = FALSE; + + if (!readmem(VADDR, mem_section_ptr, mem_sec, mem_section_size)) { + ERRMSG("Can't read mem_section array.\n"); + return FALSE; + } + for (section_nr = 0; section_nr < num_section; section_nr++) { + section = nr_to_section(section_nr, mem_sec); + if (section == NOT_KV_ADDR) { + mem_map = NOT_MEMMAP_ADDR; + } else { + mem_map = section_mem_map_addr(section, &map_mask); + if (!(map_mask & SECTION_MARKED_PRESENT)) { + return FALSE; + } + if (mem_map == 0) { + mem_map = NOT_MEMMAP_ADDR; + } else { + mem_map = sparse_decode_mem_map(mem_map, + section_nr); + if (!is_kvaddr(mem_map)) { + return FALSE; + } + ret = TRUE; + } + } + mem_maps[section_nr] = mem_map; + } + return ret; +} + +static int +get_mem_section(unsigned int mem_section_size, unsigned long *mem_maps, + unsigned int num_section) +{ + unsigned long mem_section_ptr; + int ret = FALSE; + unsigned long *mem_sec = NULL; + + if ((mem_sec = malloc(mem_section_size)) == NULL) { + ERRMSG("Can't allocate memory for the mem_section. %s\n", + strerror(errno)); + return FALSE; + } + ret = validate_mem_section(mem_sec, SYMBOL(mem_section), + mem_section_size, mem_maps, num_section); + + if (is_sparsemem_extreme()) { + int symbol_valid = ret; + int pointer_valid; + int mem_maps_size = sizeof(*mem_maps) * num_section; + unsigned long *mem_maps_ex = NULL; + if (!readmem(VADDR, SYMBOL(mem_section), &mem_section_ptr, + sizeof(mem_section_ptr))) + goto out; + + if ((mem_maps_ex = malloc(mem_maps_size)) == NULL) { + ERRMSG("Can't allocate memory for the mem_maps. %s\n", + strerror(errno)); + goto out; + } + + pointer_valid = validate_mem_section(mem_sec, + mem_section_ptr, + mem_section_size, + mem_maps_ex, + num_section); + if (pointer_valid) + memcpy(mem_maps, mem_maps_ex, mem_maps_size); + if (mem_maps_ex) + free(mem_maps_ex); + ret = symbol_valid ^ pointer_valid; + if (!ret) { + ERRMSG("Could not validate mem_section.\n"); + } + } +out: + if (mem_sec != NULL) + free(mem_sec); + return ret; +} + +int +get_mm_sparsemem(void) +{ + unsigned int section_nr, mem_section_size, num_section; + mdf_pfn_t pfn_start, pfn_end; + unsigned long *mem_maps = NULL; + + int ret = FALSE; + + /* + * Get the address of the symbol "mem_section". + */ + num_section = divideup(info->max_mapnr, PAGES_PER_SECTION()); + if (is_sparsemem_extreme()) { + info->sections_per_root = _SECTIONS_PER_ROOT_EXTREME(); + mem_section_size = sizeof(void *) * NR_SECTION_ROOTS(); + } else { + info->sections_per_root = _SECTIONS_PER_ROOT(); + mem_section_size = SIZE(mem_section) * NR_SECTION_ROOTS(); + } + if ((mem_maps = malloc(sizeof(*mem_maps) * num_section)) == NULL) { + ERRMSG("Can't allocate memory for the mem_maps. %s\n", + strerror(errno)); + return FALSE; + } + if (!get_mem_section(mem_section_size, mem_maps, num_section)) { + ERRMSG("Can't get the address of mem_section.\n"); + goto out; + } + info->num_mem_map = num_section; + if ((info->mem_map_data = (struct mem_map_data *) + malloc(sizeof(struct mem_map_data)*info->num_mem_map)) == NULL) { + ERRMSG("Can't allocate memory for the mem_map_data. %s\n", + strerror(errno)); + goto out; + } + for (section_nr = 0; section_nr < num_section; section_nr++) { + pfn_start = section_nr * PAGES_PER_SECTION(); + pfn_end = pfn_start + PAGES_PER_SECTION(); + if (info->max_mapnr < pfn_end) + pfn_end = info->max_mapnr; + dump_mem_map(pfn_start, pfn_end, mem_maps[section_nr], section_nr); + } + ret = TRUE; +out: + if (mem_maps != NULL) + free(mem_maps); + return ret; +} + +int +get_mem_map_without_mm(void) +{ + info->num_mem_map = 1; + if ((info->mem_map_data = (struct mem_map_data *) + malloc(sizeof(struct mem_map_data)*info->num_mem_map)) == NULL) { + ERRMSG("Can't allocate memory for the mem_map_data. %s\n", + strerror(errno)); + return FALSE; + } + if (is_xen_memory()) + dump_mem_map(0, info->dom0_mapnr, NOT_MEMMAP_ADDR, 0); + else + dump_mem_map(0, info->max_mapnr, NOT_MEMMAP_ADDR, 0); + + return TRUE; +} + +int +get_mem_map(void) +{ + mdf_pfn_t max_pfn = 0; + unsigned int i; + int ret; + + switch (get_mem_type()) { + case SPARSEMEM: + DEBUG_MSG("\n"); + DEBUG_MSG("Memory type : SPARSEMEM\n"); + DEBUG_MSG("\n"); + ret = get_mm_sparsemem(); + break; + case SPARSEMEM_EX: + DEBUG_MSG("\n"); + DEBUG_MSG("Memory type : SPARSEMEM_EX\n"); + DEBUG_MSG("\n"); + ret = get_mm_sparsemem(); + break; + case DISCONTIGMEM: + DEBUG_MSG("\n"); + DEBUG_MSG("Memory type : DISCONTIGMEM\n"); + DEBUG_MSG("\n"); + ret = get_mm_discontigmem(); + break; + case FLATMEM: + DEBUG_MSG("\n"); + DEBUG_MSG("Memory type : FLATMEM\n"); + DEBUG_MSG("\n"); + ret = get_mm_flatmem(); + break; + default: + ERRMSG("Can't distinguish the memory type.\n"); + ret = FALSE; + break; + } + /* + * Adjust "max_mapnr" for the case that Linux uses less memory + * than is dumped. For example when "mem=" has been used for the + * dumped system. + */ + if (!is_xen_memory()) { + unsigned int valid_memmap = 0; + for (i = 0; i < info->num_mem_map; i++) { + if (info->mem_map_data[i].mem_map == NOT_MEMMAP_ADDR) + continue; + max_pfn = MAX(max_pfn, info->mem_map_data[i].pfn_end); + valid_memmap++; + } + if (valid_memmap) { + info->max_mapnr = MIN(info->max_mapnr, max_pfn); + } + } + return ret; +} + +int +initialize_bitmap_memory(void) +{ + struct disk_dump_header *dh; + struct kdump_sub_header *kh; + struct dump_bitmap *bmp; + off_t bitmap_offset; + off_t bitmap_len, max_sect_len; + mdf_pfn_t pfn; + int i, j; + long block_size; + + dh = info->dh_memory; + kh = info->kh_memory; + block_size = dh->block_size; + + bitmap_offset + = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size) * block_size; + bitmap_len = block_size * dh->bitmap_blocks; + + bmp = malloc(sizeof(struct dump_bitmap)); + if (bmp == NULL) { + ERRMSG("Can't allocate memory for the memory-bitmap. %s\n", + strerror(errno)); + return FALSE; + } + bmp->buf = malloc(BUFSIZE_BITMAP); + if (bmp->buf == NULL) { + ERRMSG("Can't allocate memory for the bitmap buffer. %s\n", + strerror(errno)); + return FALSE; + } + bmp->fd = info->fd_memory; + bmp->file_name = info->name_memory; + bmp->no_block = -1; + memset(bmp->buf, 0, BUFSIZE_BITMAP); + bmp->offset = bitmap_offset + bitmap_len / 2; + info->bitmap_memory = bmp; + + if (dh->header_version >= 6) + max_sect_len = divideup(kh->max_mapnr_64, BITMAP_SECT_LEN); + else + max_sect_len = divideup(dh->max_mapnr, BITMAP_SECT_LEN); + info->valid_pages = calloc(sizeof(ulong), max_sect_len); + if (info->valid_pages == NULL) { + ERRMSG("Can't allocate memory for the valid_pages. %s\n", + strerror(errno)); + free(bmp->buf); + free(bmp); + return FALSE; + } + for (i = 1, pfn = 0; i < max_sect_len; i++) { + info->valid_pages[i] = info->valid_pages[i - 1]; + for (j = 0; j < BITMAP_SECT_LEN; j++, pfn++) + if (is_dumpable(info->bitmap_memory, pfn, NULL)) + info->valid_pages[i]++; + } + + return TRUE; +} + +void +initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int thread_num) +{ + bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(thread_num); + bitmap->file_name = info->name_memory; + bitmap->no_block = -1; + memset(bitmap->buf, 0, BUFSIZE_BITMAP); + bitmap->offset = info->bitmap_memory->offset; +} + +int +calibrate_machdep_info(void) +{ + if (NUMBER(MAX_PHYSMEM_BITS) > 0) + info->max_physmem_bits = NUMBER(MAX_PHYSMEM_BITS); + + if (NUMBER(SECTION_SIZE_BITS) > 0) + info->section_size_bits = NUMBER(SECTION_SIZE_BITS); + + return TRUE; +} + +int +initial_for_parallel() +{ + unsigned long len_buf_out; + unsigned long page_data_buf_size; + struct page_flag *current; + int i, j; + + len_buf_out = calculate_len_buf_out(info->page_size); + + /* + * allocate memory for threads + */ + if ((info->threads = malloc(sizeof(pthread_t *) * info->num_threads)) + == NULL) { + MSG("Can't allocate memory for threads. %s\n", + strerror(errno)); + return FALSE; + } + memset(info->threads, 0, sizeof(pthread_t *) * info->num_threads); + + if ((info->kdump_thread_args = + malloc(sizeof(struct thread_args) * info->num_threads)) + == NULL) { + MSG("Can't allocate memory for arguments of threads. %s\n", + strerror(errno)); + return FALSE; + } + memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_threads); + + for (i = 0; i < info->num_threads; i++) { + if ((info->threads[i] = malloc(sizeof(pthread_t))) == NULL) { + MSG("Can't allocate memory for thread %d. %s", + i, strerror(errno)); + return FALSE; + } + + if ((BUF_PARALLEL(i) = malloc(info->page_size)) == NULL) { + MSG("Can't allocate memory for the memory buffer. %s\n", + strerror(errno)); + return FALSE; + } + + if ((BUF_OUT_PARALLEL(i) = malloc(len_buf_out)) == NULL) { + MSG("Can't allocate memory for the compression buffer. %s\n", + strerror(errno)); + return FALSE; + } + + if ((MMAP_CACHE_PARALLEL(i) = malloc(sizeof(struct mmap_cache))) == NULL) { + MSG("Can't allocate memory for mmap_cache. %s\n", + strerror(errno)); + return FALSE; + } + + /* + * initial for mmap_cache + */ + MMAP_CACHE_PARALLEL(i)->mmap_buf = MAP_FAILED; + MMAP_CACHE_PARALLEL(i)->mmap_start_offset = 0; + MMAP_CACHE_PARALLEL(i)->mmap_end_offset = 0; + + if (initialize_zlib(&ZLIB_STREAM_PARALLEL(i), Z_BEST_SPEED) == FALSE) { + ERRMSG("zlib initialization failed.\n"); + return FALSE; + } + +#ifdef USELZO + if ((WRKMEM_PARALLEL(i) = malloc(LZO1X_1_MEM_COMPRESS)) == NULL) { + MSG("Can't allocate memory for the working memory. %s\n", + strerror(errno)); + return FALSE; + } +#endif + } + + info->num_buffers = PAGE_DATA_NUM * info->num_threads; + + /* + * allocate memory for page_data + */ + if ((info->page_data_buf = malloc(sizeof(struct page_data) * info->num_buffers)) + == NULL) { + MSG("Can't allocate memory for page_data_buf. %s\n", + strerror(errno)); + return FALSE; + } + memset(info->page_data_buf, 0, sizeof(struct page_data) * info->num_buffers); + + page_data_buf_size = MAX(len_buf_out, info->page_size); + for (i = 0; i < info->num_buffers; i++) { + if ((info->page_data_buf[i].buf = malloc(page_data_buf_size)) == NULL) { + MSG("Can't allocate memory for buf of page_data_buf. %s\n", + strerror(errno)); + return FALSE; + } + } + + /* + * initial page_flag for each thread + */ + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) + == NULL) { + MSG("Can't allocate memory for page_flag_buf. %s\n", + strerror(errno)); + return FALSE; + } + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); + + for (i = 0; i < info->num_threads; i++) { + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { + MSG("Can't allocate memory for page_flag. %s\n", + strerror(errno)); + return FALSE; + } + current = info->page_flag_buf[i]; + + for (j = 1; j < PAGE_FLAG_NUM; j++) { + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { + MSG("Can't allocate memory for page_flag. %s\n", + strerror(errno)); + return FALSE; + } + current = current->next; + } + current->next = info->page_flag_buf[i]; + } + + /* + * initial fd_memory for threads + */ + for (i = 0; i < info->num_threads; i++) { + if ((FD_MEMORY_PARALLEL(i) = open(info->name_memory, O_RDONLY)) + < 0) { + ERRMSG("Can't open the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + if ((FD_BITMAP_MEMORY_PARALLEL(i) = + open(info->name_memory, O_RDONLY)) < 0) { + ERRMSG("Can't open the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + } + + return TRUE; +} + +void +free_for_parallel() +{ + int i, j; + struct page_flag *current; + + if (info->threads != NULL) { + for (i = 0; i < info->num_threads; i++) { + if (info->threads[i] != NULL) + free(info->threads[i]); + + if (BUF_PARALLEL(i) != NULL) + free(BUF_PARALLEL(i)); + + if (BUF_OUT_PARALLEL(i) != NULL) + free(BUF_OUT_PARALLEL(i)); + + if (MMAP_CACHE_PARALLEL(i) != NULL) { + if (MMAP_CACHE_PARALLEL(i)->mmap_buf != + MAP_FAILED) + munmap(MMAP_CACHE_PARALLEL(i)->mmap_buf, + MMAP_CACHE_PARALLEL(i)->mmap_end_offset + - MMAP_CACHE_PARALLEL(i)->mmap_start_offset); + + free(MMAP_CACHE_PARALLEL(i)); + } + finalize_zlib(&ZLIB_STREAM_PARALLEL(i)); +#ifdef USELZO + if (WRKMEM_PARALLEL(i) != NULL) + free(WRKMEM_PARALLEL(i)); +#endif + + } + free(info->threads); + } + + if (info->kdump_thread_args != NULL) + free(info->kdump_thread_args); + + if (info->page_data_buf != NULL) { + for (i = 0; i < info->num_buffers; i++) { + if (info->page_data_buf[i].buf != NULL) + free(info->page_data_buf[i].buf); + } + free(info->page_data_buf); + } + + if (info->page_flag_buf != NULL) { + for (i = 0; i < info->num_threads; i++) { + for (j = 0; j < PAGE_FLAG_NUM; j++) { + if (info->page_flag_buf[i] != NULL) { + current = info->page_flag_buf[i]; + info->page_flag_buf[i] = current->next; + free(current); + } + } + } + free(info->page_flag_buf); + } + + if (info->parallel_info == NULL) + return; + + for (i = 0; i < info->num_threads; i++) { + if (FD_MEMORY_PARALLEL(i) >= 0) + close(FD_MEMORY_PARALLEL(i)); + + if (FD_BITMAP_MEMORY_PARALLEL(i) >= 0) + close(FD_BITMAP_MEMORY_PARALLEL(i)); + } +} + +int +find_kaslr_offsets() +{ + off_t offset; + unsigned long size; + int ret = FALSE; + + get_vmcoreinfo(&offset, &size); + + if (!(info->name_vmcoreinfo = strdup(FILENAME_VMCOREINFO))) { + MSG("Can't duplicate strings(%s).\n", FILENAME_VMCOREINFO); + return FALSE; + } + if (!copy_vmcoreinfo(offset, size)) + goto out; + + if (!open_vmcoreinfo("r")) + goto out; + + unlink(info->name_vmcoreinfo); + + /* + * This arch specific function should update info->kaslr_offset. If + * kaslr is not enabled then offset will be set to 0. arch specific + * function might need to read from vmcoreinfo, therefore we have + * called this function between open_vmcoreinfo() and + * close_vmcoreinfo() + */ + get_kaslr_offset(SYMBOL(_stext)); + + close_vmcoreinfo(); + + ret = TRUE; +out: + free(info->name_vmcoreinfo); + info->name_vmcoreinfo = NULL; + + return ret; +} + +int +initial(void) +{ + off_t offset; + unsigned long size; + int debug_info = FALSE; + + if (is_xen_memory() && !initial_xen()) + return FALSE; + +#ifdef USELZO + if (lzo_init() == LZO_E_OK) + info->flag_lzo_support = TRUE; +#else + if (info->flag_compress == DUMP_DH_COMPRESSED_LZO) { + MSG("'-l' option is disabled, "); + MSG("because this binary doesn't support lzo compression.\n"); + MSG("Try `make USELZO=on` when building.\n"); + } +#endif + +#ifndef USESNAPPY + if (info->flag_compress == DUMP_DH_COMPRESSED_SNAPPY) { + MSG("'-p' option is disabled, "); + MSG("because this binary doesn't support snappy " + "compression.\n"); + MSG("Try `make USESNAPPY=on` when building.\n"); + } +#endif + + if (info->flag_exclude_xen_dom && !is_xen_memory()) { + MSG("'-X' option is disable,"); + MSG("because %s is not Xen's memory core image.\n", info->name_memory); + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + return FALSE; + } + /* + * Get the debug information for analysis from the vmcoreinfo file + */ + if (info->flag_read_vmcoreinfo) { + char *name_vmcoreinfo = info->name_vmcoreinfo; + FILE *file_vmcoreinfo = info->file_vmcoreinfo; + + if (has_vmcoreinfo() && !find_kaslr_offsets()) + return FALSE; + + info->name_vmcoreinfo = name_vmcoreinfo; + info->file_vmcoreinfo = file_vmcoreinfo; + + info->read_text_vmcoreinfo = 1; + if (!read_vmcoreinfo()) + return FALSE; + info->read_text_vmcoreinfo = 0; + + close_vmcoreinfo(); + debug_info = TRUE; + /* + * Get the debug information for analysis from the kernel file + */ + } else if (info->name_vmlinux) { + set_dwarf_debuginfo("vmlinux", NULL, + info->name_vmlinux, info->fd_vmlinux); + + if (has_vmcoreinfo() && !find_kaslr_offsets()) + return FALSE; + + if (!get_symbol_info()) + return FALSE; + + if (!get_structure_info()) + return FALSE; + + if (!get_srcfile_info()) + return FALSE; + + debug_info = TRUE; + } else { + /* + * Check whether /proc/vmcore contains vmcoreinfo, + * and get both the offset and the size. + */ + if (!has_vmcoreinfo()) { + if (info->max_dump_level <= DL_EXCLUDE_ZERO) + goto out; + + MSG("%s doesn't contain vmcoreinfo.\n", + info->name_memory); + MSG("Specify '-x' option or '-i' option.\n"); + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + return FALSE; + } + } + + /* + * Get the debug information from /proc/vmcore. + * NOTE: Don't move this code to the above, because the debugging + * information token by -x/-i option is overwritten by vmcoreinfo + * in /proc/vmcore. vmcoreinfo in /proc/vmcore is more reliable + * than -x/-i option. + */ + if (has_vmcoreinfo()) { + get_vmcoreinfo(&offset, &size); + if (!read_vmcoreinfo_from_vmcore(offset, size, FALSE)) + return FALSE; + debug_info = TRUE; + } + + if (!get_value_for_old_linux()) + return FALSE; + + if (info->flag_refiltering) { + if (info->flag_elf_dumpfile) { + MSG("'-E' option is disable, "); + MSG("because %s is kdump compressed format.\n", + info->name_memory); + return FALSE; + } + + info->phys_base = info->kh_memory->phys_base; + info->max_dump_level |= info->kh_memory->dump_level; + + if (!initialize_bitmap_memory()) + return FALSE; + + } else if (info->flag_sadump) { + if (info->flag_elf_dumpfile) { + MSG("'-E' option is disable, "); + MSG("because %s is sadump %s format.\n", + info->name_memory, sadump_format_type_name()); + return FALSE; + } + + if (!set_page_size(sadump_page_size())) + return FALSE; + + if (!sadump_initialize_bitmap_memory()) + return FALSE; + + (void) sadump_set_timestamp(&info->timestamp); + + /* + * NOTE: phys_base is never saved by sadump and so + * must be computed in some way. We here choose the + * way of looking at linux_banner. See + * sadump_virt_phys_base(). The processing is + * postponed until debug information becomes + * available. + */ + } + +out: + if (!info->page_size) { + /* + * If we cannot get page_size from a vmcoreinfo file, + * fall back to the current kernel page size. + */ + if (!fallback_to_current_page_size()) + return FALSE; + } + + if (!is_xen_memory() && !cache_init()) + return FALSE; + + if (info->flag_mem_usage && !get_kcore_dump_loads()) + return FALSE; + + if (!info->flag_refiltering && !info->flag_sadump) { + if (!get_phys_base()) + return FALSE; + } + + if (!get_max_mapnr()) + return FALSE; + + if (info->working_dir || info->flag_reassemble || info->flag_refiltering + || info->flag_sadump || info->flag_mem_usage) { + /* Can be done in 1-cycle by using backing file. */ + info->flag_cyclic = FALSE; + info->pfn_cyclic = info->max_mapnr; + } else { + if (info->bufsize_cyclic == 0) { + if (!calculate_cyclic_buffer_size()) + return FALSE; + + if (info->bufsize_cyclic * BITPERBYTE >= info->max_mapnr * 2) { + DEBUG_MSG("There is enough free memory to be done"); + DEBUG_MSG(" in one cycle.\n"); + info->flag_cyclic = FALSE; + } + } else { + unsigned long long free_memory; + + /* + * The buffer size is specified as Kbyte with + * --cyclic-buffer <size> option. + */ + info->bufsize_cyclic <<= 10; + + /* + * Truncate the buffer size to free memory size. + */ + free_memory = get_free_memory_size(); + if (info->num_dumpfile > 1) + free_memory /= info->num_dumpfile; + if (info->bufsize_cyclic > free_memory) { + MSG("Specified buffer size is larger than free memory.\n"); + MSG("The buffer size for the cyclic mode will "); + MSG("be truncated to %lld byte.\n", free_memory); + info->bufsize_cyclic = free_memory; + } + } + + info->pfn_cyclic = info->bufsize_cyclic * BITPERBYTE; + + DEBUG_MSG("\n"); + DEBUG_MSG("Buffer size for the cyclic mode: %ld\n", info->bufsize_cyclic); + } + + if (info->num_threads) { + if (is_xen_memory()) { + MSG("'--num-threads' option is disable,\n"); + MSG("because %s is Xen's memory core image.\n", + info->name_memory); + return FALSE; + } + + if (info->flag_sadump) { + MSG("'--num-threads' option is disable,\n"); + MSG("because %s is sadump %s format.\n", + info->name_memory, sadump_format_type_name()); + return FALSE; + } + + if (!initial_for_parallel()) { + MSG("Fail to initial for parallel process.\n"); + return FALSE; + } + } + + if (debug_info && !get_machdep_info()) + return FALSE; + + if (debug_info && !calibrate_machdep_info()) + return FALSE; + + if (is_xen_memory() && !get_dom0_mapnr()) + return FALSE; + + if (debug_info) { + if (info->flag_sadump) + (void) sadump_virt_phys_base(); + + if (info->flag_sadump) { + int online_cpus; + + online_cpus = sadump_num_online_cpus(); + if (!online_cpus) + return FALSE; + + set_nr_cpus(online_cpus); + } + + if (!check_release()) + return FALSE; + + if (!get_versiondep_info()) + return FALSE; + + /* + * NOTE: This must be done before refering to + * VMALLOC'ed memory. The first 640kB contains data + * necessary for paging, like PTE. The absence of the + * region affects reading VMALLOC'ed memory such as + * module data. + */ + if (info->flag_sadump) + sadump_kdump_backup_region_init(); + + if (!get_numnodes()) + return FALSE; + + if (!get_mem_map()) + return FALSE; + + if (!info->flag_dmesg && info->flag_sadump && + sadump_check_debug_info() && + !sadump_generate_elf_note_from_dumpfile()) + return FALSE; + + } else { + if (!get_mem_map_without_mm()) + return FALSE; + } + + /* use buddy identification of free pages whether cyclic or not */ + /* (this can reduce pages scan of 1TB memory from 60sec to 30sec) */ + if (info->dump_level & DL_EXCLUDE_FREE) + setup_page_is_buddy(); + + if (info->flag_usemmap == MMAP_TRY ) { + if (initialize_mmap()) { + DEBUG_MSG("mmap() is available on the kernel.\n"); + info->flag_usemmap = MMAP_ENABLE; + } else { + DEBUG_MSG("The kernel doesn't support mmap(),"); + DEBUG_MSG("read() will be used instead.\n"); + info->flag_usemmap = MMAP_DISABLE; + } + } else if (info->flag_usemmap == MMAP_DISABLE) + DEBUG_MSG("mmap() is disabled by specified option '--non-mmap'.\n"); + + return TRUE; +} + +void +initialize_bitmap(struct dump_bitmap *bitmap) +{ + if (info->fd_bitmap >= 0) { + bitmap->fd = info->fd_bitmap; + bitmap->file_name = info->name_bitmap; + bitmap->no_block = -1; + memset(bitmap->buf, 0, BUFSIZE_BITMAP); + } else { + bitmap->fd = -1; + bitmap->file_name = NULL; + bitmap->no_block = -1; + memset(bitmap->buf, 0, info->bufsize_cyclic); + } +} + +void +initialize_1st_bitmap(struct dump_bitmap *bitmap) +{ + initialize_bitmap(bitmap); + bitmap->offset = 0; +} + +void +initialize_2nd_bitmap(struct dump_bitmap *bitmap) +{ + initialize_bitmap(bitmap); + bitmap->offset = info->len_bitmap / 2; +} + +void +initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int thread_num) +{ + bitmap->fd = FD_BITMAP_PARALLEL(thread_num); + bitmap->file_name = info->name_bitmap; + bitmap->no_block = -1; + memset(bitmap->buf, 0, BUFSIZE_BITMAP); + bitmap->offset = info->len_bitmap / 2; +} + +int +set_bitmap_file(struct dump_bitmap *bitmap, mdf_pfn_t pfn, int val) +{ + int byte, bit; + off_t old_offset, new_offset; + old_offset = bitmap->offset + BUFSIZE_BITMAP * bitmap->no_block; + new_offset = bitmap->offset + BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP); + + if (0 <= bitmap->no_block && old_offset != new_offset) { + if (lseek(bitmap->fd, old_offset, SEEK_SET) < 0 ) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + bitmap->file_name, strerror(errno)); + return FALSE; + } + if (write(bitmap->fd, bitmap->buf, BUFSIZE_BITMAP) + != BUFSIZE_BITMAP) { + ERRMSG("Can't write the bitmap(%s). %s\n", + bitmap->file_name, strerror(errno)); + return FALSE; + } + } + if (old_offset != new_offset) { + if (lseek(bitmap->fd, new_offset, SEEK_SET) < 0 ) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + bitmap->file_name, strerror(errno)); + return FALSE; + } + if (read(bitmap->fd, bitmap->buf, BUFSIZE_BITMAP) + != BUFSIZE_BITMAP) { + ERRMSG("Can't read the bitmap(%s). %s\n", + bitmap->file_name, strerror(errno)); + return FALSE; + } + bitmap->no_block = pfn / PFN_BUFBITMAP; + } + /* + * If val is 0, clear bit on the bitmap. + */ + byte = (pfn%PFN_BUFBITMAP)>>3; + bit = (pfn%PFN_BUFBITMAP) & 7; + if (val) + bitmap->buf[byte] |= 1<<bit; + else + bitmap->buf[byte] &= ~(1<<bit); + + return TRUE; +} + +int +set_bitmap_buffer(struct dump_bitmap *bitmap, mdf_pfn_t pfn, int val, struct cycle *cycle) +{ + int byte, bit; + static int warning = 0; + + if (!is_cyclic_region(pfn, cycle)) { + if (warning == 0) { + MSG("WARNING: PFN out of cycle range. (pfn:%llx, ", pfn); + MSG("cycle:[%llx-%llx])\n", cycle->start_pfn, cycle->end_pfn); + warning = 1; + } + return FALSE; + } + + /* + * If val is 0, clear bit on the bitmap. + */ + byte = (pfn - cycle->start_pfn)>>3; + bit = (pfn - cycle->start_pfn) & 7; + if (val) + bitmap->buf[byte] |= 1<<bit; + else + bitmap->buf[byte] &= ~(1<<bit); + + return TRUE; +} + +int +set_bitmap(struct dump_bitmap *bitmap, mdf_pfn_t pfn, int val, struct cycle *cycle) +{ + if (bitmap->fd >= 0) { + return set_bitmap_file(bitmap, pfn, val); + } else { + return set_bitmap_buffer(bitmap, pfn, val, cycle); + } +} + +int +sync_bitmap(struct dump_bitmap *bitmap) +{ + off_t offset; + offset = bitmap->offset + BUFSIZE_BITMAP * bitmap->no_block; + + /* + * The bitmap doesn't have the fd, it's a on-memory bitmap. + */ + if (bitmap->fd < 0) + return TRUE; + /* + * The bitmap buffer is not dirty, and it is not necessary + * to write out it. + */ + if (bitmap->no_block < 0) + return TRUE; + + if (lseek(bitmap->fd, offset, SEEK_SET) < 0 ) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + bitmap->file_name, strerror(errno)); + return FALSE; + } + if (write(bitmap->fd, bitmap->buf, BUFSIZE_BITMAP) + != BUFSIZE_BITMAP) { + ERRMSG("Can't write the bitmap(%s). %s\n", + bitmap->file_name, strerror(errno)); + return FALSE; + } + return TRUE; +} + +int +sync_1st_bitmap(void) +{ + return sync_bitmap(info->bitmap1); +} + +int +sync_2nd_bitmap(void) +{ + return sync_bitmap(info->bitmap2); +} + +int +set_bit_on_1st_bitmap(mdf_pfn_t pfn, struct cycle *cycle) +{ + return set_bitmap(info->bitmap1, pfn, 1, cycle); +} + +int +clear_bit_on_1st_bitmap(mdf_pfn_t pfn, struct cycle *cycle) +{ + return set_bitmap(info->bitmap1, pfn, 0, cycle); + +} + +int +clear_bit_on_2nd_bitmap(mdf_pfn_t pfn, struct cycle *cycle) +{ + return set_bitmap(info->bitmap2, pfn, 0, cycle); +} + +int +clear_bit_on_2nd_bitmap_for_kernel(mdf_pfn_t pfn, struct cycle *cycle) +{ + unsigned long long maddr; + + if (is_xen_memory()) { + maddr = ptom_xen(pfn_to_paddr(pfn)); + if (maddr == NOT_PADDR) { + ERRMSG("Can't convert a physical address(%llx) to machine address.\n", + pfn_to_paddr(pfn)); + return FALSE; + } + pfn = paddr_to_pfn(maddr); + } + return clear_bit_on_2nd_bitmap(pfn, cycle); +} + +int +set_bit_on_2nd_bitmap(mdf_pfn_t pfn, struct cycle *cycle) +{ + return set_bitmap(info->bitmap2, pfn, 1, cycle); +} + +int +set_bit_on_2nd_bitmap_for_kernel(mdf_pfn_t pfn, struct cycle *cycle) +{ + unsigned long long maddr; + + if (is_xen_memory()) { + maddr = ptom_xen(pfn_to_paddr(pfn)); + if (maddr == NOT_PADDR) { + ERRMSG("Can't convert a physical address(%llx) to machine address.\n", + pfn_to_paddr(pfn)); + return FALSE; + } + pfn = paddr_to_pfn(maddr); + } + return set_bit_on_2nd_bitmap(pfn, cycle); +} + +int +read_cache(struct cache_data *cd) +{ + const off_t failed = (off_t)-1; + + if (lseek(cd->fd, cd->offset, SEEK_SET) == failed) { + ERRMSG("Can't seek the dump file(%s). %s\n", + cd->file_name, strerror(errno)); + return FALSE; + } + if (read(cd->fd, cd->buf, cd->cache_size) != cd->cache_size) { + ERRMSG("Can't read the dump file(%s). %s\n", + cd->file_name, strerror(errno)); + return FALSE; + } + cd->offset += cd->cache_size; + return TRUE; +} + +int +is_bigendian(void) +{ + int i = 0x12345678; + + if (*(char *)&i == 0x12) + return TRUE; + else + return FALSE; +} + +int +write_and_check_space(int fd, void *buf, size_t buf_size, char *file_name) +{ + int status, written_size = 0; + + while (written_size < buf_size) { + status = write(fd, buf + written_size, + buf_size - written_size); + if (0 < status) { + written_size += status; + continue; + } + if (errno == ENOSPC) + info->flag_nospace = TRUE; + MSG("\nCan't write the dump file(%s). %s\n", + file_name, strerror(errno)); + return FALSE; + } + return TRUE; +} + +int +write_buffer(int fd, off_t offset, void *buf, size_t buf_size, char *file_name) +{ + struct makedumpfile_data_header fdh; + const off_t failed = (off_t)-1; + + if (fd == STDOUT_FILENO) { + /* + * Output a header of flattened format instead of + * lseek(). For sending dump data to a different + * architecture, change the values to big endian. + */ + if (is_bigendian()){ + fdh.offset = offset; + fdh.buf_size = buf_size; + } else { + fdh.offset = bswap_64(offset); + fdh.buf_size = bswap_64(buf_size); + } + if (!write_and_check_space(fd, &fdh, sizeof(fdh), file_name)) + return FALSE; + } else { + if (lseek(fd, offset, SEEK_SET) == failed) { + ERRMSG("Can't seek the dump file(%s). %s\n", + file_name, strerror(errno)); + return FALSE; + } + } + if (!write_and_check_space(fd, buf, buf_size, file_name)) + return FALSE; + + return TRUE; +} + +int +write_cache(struct cache_data *cd, void *buf, size_t size) +{ + memcpy(cd->buf + cd->buf_size, buf, size); + cd->buf_size += size; + + if (cd->buf_size < cd->cache_size) + return TRUE; + + if (!write_buffer(cd->fd, cd->offset, cd->buf, cd->cache_size, + cd->file_name)) + return FALSE; + + cd->buf_size -= cd->cache_size; + memcpy(cd->buf, cd->buf + cd->cache_size, cd->buf_size); + cd->offset += cd->cache_size; + return TRUE; +} + +int +write_cache_bufsz(struct cache_data *cd) +{ + if (!cd->buf_size) + return TRUE; + + if (!write_buffer(cd->fd, cd->offset, cd->buf, cd->buf_size, + cd->file_name)) + return FALSE; + + cd->offset += cd->buf_size; + cd->buf_size = 0; + return TRUE; +} + +int +write_cache_zero(struct cache_data *cd, size_t size) +{ + if (!write_cache_bufsz(cd)) + return FALSE; + + memset(cd->buf + cd->buf_size, 0, size); + cd->buf_size += size; + + return write_cache_bufsz(cd); +} + +int +read_buf_from_stdin(void *buf, int buf_size) +{ + int read_size = 0, tmp_read_size = 0; + time_t last_time, tm; + + last_time = time(NULL); + + while (read_size != buf_size) { + + tmp_read_size = read(STDIN_FILENO, buf + read_size, + buf_size - read_size); + + if (tmp_read_size < 0) { + ERRMSG("Can't read STDIN. %s\n", strerror(errno)); + return FALSE; + + } else if (0 == tmp_read_size) { + /* + * If it cannot get any data from a standard input + * for a long time, break this loop. + */ + tm = time(NULL); + if (TIMEOUT_STDIN < (tm - last_time)) { + ERRMSG("Can't get any data from STDIN.\n"); + return FALSE; + } + } else { + read_size += tmp_read_size; + last_time = time(NULL); + } + } + return TRUE; +} + +int +read_start_flat_header(void) +{ + char buf[MAX_SIZE_MDF_HEADER]; + struct makedumpfile_header fh; + + /* + * Get flat header. + */ + if (!read_buf_from_stdin(buf, MAX_SIZE_MDF_HEADER)) { + ERRMSG("Can't get header of flattened format.\n"); + return FALSE; + } + memcpy(&fh, buf, sizeof(fh)); + + if (!is_bigendian()){ + fh.type = bswap_64(fh.type); + fh.version = bswap_64(fh.version); + } + + /* + * Check flat header. + */ + if (strcmp(fh.signature, MAKEDUMPFILE_SIGNATURE)) { + ERRMSG("Can't get signature of flattened format.\n"); + return FALSE; + } + if (fh.type != TYPE_FLAT_HEADER) { + ERRMSG("Can't get type of flattened format.\n"); + return FALSE; + } + + return TRUE; +} + +static void +exclude_nodata_pages(struct cycle *cycle) +{ + int i; + unsigned long long phys_start, phys_end; + off_t file_size; + + i = 0; + while (get_pt_load_extents(i, &phys_start, &phys_end, + NULL, &file_size)) { + unsigned long long pfn, pfn_end; + + pfn = paddr_to_pfn(phys_start + file_size); + pfn_end = paddr_to_pfn(roundup(phys_end, PAGESIZE())); + + if (pfn < cycle->start_pfn) + pfn = cycle->start_pfn; + if (pfn_end >= cycle->end_pfn) + pfn_end = cycle->end_pfn - 1; + while (pfn < pfn_end) { + clear_bit_on_2nd_bitmap(pfn, cycle); + ++pfn; + } + ++i; + } +} + +int +read_flat_data_header(struct makedumpfile_data_header *fdh) +{ + if (!read_buf_from_stdin(fdh, + sizeof(struct makedumpfile_data_header))) { + ERRMSG("Can't get header of flattened format.\n"); + return FALSE; + } + if (!is_bigendian()){ + fdh->offset = bswap_64(fdh->offset); + fdh->buf_size = bswap_64(fdh->buf_size); + } + return TRUE; +} + +int +reserve_diskspace(int fd, off_t start_offset, off_t end_offset, char *file_name) +{ + off_t off; + size_t buf_size, write_size; + char *buf = NULL; + + int ret = FALSE; + + assert(start_offset < end_offset); + buf_size = end_offset - start_offset; + + buf_size = MIN(info->page_size, buf_size); + if ((buf = malloc(buf_size)) == NULL) { + ERRMSG("Can't allocate memory for the size of reserved diskspace. %s\n", + strerror(errno)); + return FALSE; + } + + memset(buf, 0, buf_size); + off = start_offset; + + while (off < end_offset) { + write_size = MIN(buf_size, end_offset - off); + if (!write_buffer(fd, off, buf, write_size, file_name)) + goto out; + + off += write_size; + } + + ret = TRUE; +out: + if (buf != NULL) { + free(buf); + } + + return ret; +} + +#define DUMP_ELF_INCOMPLETE 0x1 +int +check_and_modify_elf_headers(char *filename) +{ + int fd, ret = FALSE; + Elf64_Ehdr ehdr64; + Elf32_Ehdr ehdr32; + + if ((fd = open(filename, O_RDWR)) < 0) { + ERRMSG("Can't open the dump file(%s). %s\n", + filename, strerror(errno)); + return FALSE; + } + + /* + * the is_elf64_memory() function still can be used. + */ + /* + * Set the incomplete flag to the e_flags of elf header. + */ + if (is_elf64_memory()) { /* ELF64 */ + if (!get_elf64_ehdr(fd, filename, &ehdr64)) { + ERRMSG("Can't get ehdr64.\n"); + goto out_close_file; + } + ehdr64.e_flags |= DUMP_ELF_INCOMPLETE; + if (!write_buffer(fd, 0, &ehdr64, sizeof(Elf64_Ehdr), filename)) + goto out_close_file; + + } else { /* ELF32 */ + if (!get_elf32_ehdr(fd, filename, &ehdr32)) { + ERRMSG("Can't get ehdr32.\n"); + goto out_close_file; + } + ehdr32.e_flags |= DUMP_ELF_INCOMPLETE; + if (!write_buffer(fd, 0, &ehdr32, sizeof(Elf32_Ehdr), filename)) + goto out_close_file; + + } + ret = TRUE; +out_close_file: + if (close(fd) < 0) { + ERRMSG("Can't close the dump file(%s). %s\n", + filename, strerror(errno)); + } + return ret; +} + +int +check_and_modify_kdump_headers(char *filename) { + int fd, ret = FALSE; + struct disk_dump_header dh; + + if (!read_disk_dump_header(&dh, filename)) + return FALSE; + + if ((fd = open(filename, O_RDWR)) < 0) { + ERRMSG("Can't open the dump file(%s). %s\n", + filename, strerror(errno)); + return FALSE; + } + + /* + * Set the incomplete flag to the status of disk_dump_header. + */ + dh.status |= DUMP_DH_COMPRESSED_INCOMPLETE; + + /* + * It's safe to overwrite the disk_dump_header. + */ + if (!write_buffer(fd, 0, &dh, sizeof(struct disk_dump_header), filename)) + goto out_close_file; + + ret = TRUE; +out_close_file: + if (close(fd) < 0) { + ERRMSG("Can't close the dump file(%s). %s\n", + filename, strerror(errno)); + } + + return ret; +} + +int +check_and_modify_multiple_kdump_headers() { + int i, status, ret = TRUE; + pid_t pid; + pid_t array_pid[info->num_dumpfile]; + + for (i = 0; i < info->num_dumpfile; i++) { + if ((pid = fork()) < 0) { + return FALSE; + + } else if (pid == 0) { /* Child */ + if (!check_and_modify_kdump_headers(SPLITTING_DUMPFILE(i))) + exit(1); + exit(0); + } + array_pid[i] = pid; + } + + for (i = 0; i < info->num_dumpfile; i++) { + waitpid(array_pid[i], &status, WUNTRACED); + if (!WIFEXITED(status) || WEXITSTATUS(status) == 1) { + ERRMSG("Check and modify the incomplete dumpfile(%s) failed.\n", + SPLITTING_DUMPFILE(i)); + ret = FALSE; + } + } + + return ret; +} + +int +check_and_modify_headers() +{ + if (info->flag_elf_dumpfile) + return check_and_modify_elf_headers(info->name_dumpfile); + else + if(info->flag_split) + return check_and_modify_multiple_kdump_headers(); + else + return check_and_modify_kdump_headers(info->name_dumpfile); + return FALSE; +} + + +int +rearrange_dumpdata(void) +{ + int read_size, tmp_read_size; + char buf[SIZE_BUF_STDIN]; + struct makedumpfile_data_header fdh; + + /* + * Get flat header. + */ + if (!read_start_flat_header()) { + ERRMSG("Can't get header of flattened format.\n"); + return FALSE; + } + + /* + * Read the first data header. + */ + if (!read_flat_data_header(&fdh)) { + ERRMSG("Can't get header of flattened format.\n"); + return FALSE; + } + + do { + read_size = 0; + while (read_size < fdh.buf_size) { + if (sizeof(buf) < (fdh.buf_size - read_size)) + tmp_read_size = sizeof(buf); + else + tmp_read_size = fdh.buf_size - read_size; + + if (!read_buf_from_stdin(buf, tmp_read_size)) { + ERRMSG("Can't get data of flattened format.\n"); + return FALSE; + } + if (!write_buffer(info->fd_dumpfile, + fdh.offset + read_size, buf, tmp_read_size, + info->name_dumpfile)) + return FALSE; + + read_size += tmp_read_size; + } + /* + * Read the next header. + */ + if (!read_flat_data_header(&fdh)) { + ERRMSG("Can't get data header of flattened format.\n"); + return FALSE; + } + + } while ((0 <= fdh.offset) && (0 < fdh.buf_size)); + + if ((fdh.offset != END_FLAG_FLAT_HEADER) + || (fdh.buf_size != END_FLAG_FLAT_HEADER)) { + ERRMSG("Can't get valid end header of flattened format.\n"); + return FALSE; + } + + return TRUE; +} + +mdf_pfn_t +page_to_pfn(unsigned long page) +{ + unsigned int num; + mdf_pfn_t pfn = ULONGLONG_MAX; + unsigned long long index = 0; + struct mem_map_data *mmd; + + mmd = info->mem_map_data; + for (num = 0; num < info->num_mem_map; num++, mmd++) { + if (mmd->mem_map == NOT_MEMMAP_ADDR) + continue; + if (page < mmd->mem_map) + continue; + index = (page - mmd->mem_map) / SIZE(page); + if (index >= mmd->pfn_end - mmd->pfn_start) + continue; + pfn = mmd->pfn_start + index; + break; + } + if (pfn == ULONGLONG_MAX) { + ERRMSG("Can't convert the address of page descriptor (%lx) to pfn.\n", page); + return ULONGLONG_MAX; + } + return pfn; +} + +int +reset_bitmap_of_free_pages(unsigned long node_zones, struct cycle *cycle) +{ + + int order, i, migrate_type, migrate_types; + unsigned long curr, previous, head, curr_page, curr_prev; + unsigned long addr_free_pages, free_pages = 0, found_free_pages = 0; + mdf_pfn_t pfn, start_pfn; + + /* + * On linux-2.6.24 or later, free_list is divided into the array. + */ + migrate_types = ARRAY_LENGTH(free_area.free_list); + if (migrate_types == NOT_FOUND_STRUCTURE) + migrate_types = 1; + + for (order = (ARRAY_LENGTH(zone.free_area) - 1); order >= 0; --order) { + for (migrate_type = 0; migrate_type < migrate_types; + migrate_type++) { + head = node_zones + OFFSET(zone.free_area) + + SIZE(free_area) * order + + OFFSET(free_area.free_list) + + SIZE(list_head) * migrate_type; + previous = head; + if (!readmem(VADDR, head + OFFSET(list_head.next), + &curr, sizeof curr)) { + ERRMSG("Can't get next list_head.\n"); + return FALSE; + } + for (;curr != head;) { + curr_page = curr - OFFSET(page.lru); + start_pfn = page_to_pfn(curr_page); + if (start_pfn == ULONGLONG_MAX) + return FALSE; + + if (!readmem(VADDR, curr+OFFSET(list_head.prev), + &curr_prev, sizeof curr_prev)) { + ERRMSG("Can't get prev list_head.\n"); + return FALSE; + } + if (previous != curr_prev) { + ERRMSG("The free list is broken.\n"); + return FALSE; + } + for (i = 0; i < (1<<order); i++) { + pfn = start_pfn + i; + if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) + found_free_pages++; + } + + previous = curr; + if (!readmem(VADDR, curr+OFFSET(list_head.next), + &curr, sizeof curr)) { + ERRMSG("Can't get next list_head.\n"); + return FALSE; + } + } + } + } + + /* + * Check the number of free pages. + */ + if (OFFSET(zone.free_pages) != NOT_FOUND_STRUCTURE) { + addr_free_pages = node_zones + OFFSET(zone.free_pages); + + } else if (OFFSET(zone.vm_stat) != NOT_FOUND_STRUCTURE) { + /* + * On linux-2.6.21 or later, the number of free_pages is + * in vm_stat[NR_FREE_PAGES]. + */ + addr_free_pages = node_zones + OFFSET(zone.vm_stat) + + sizeof(long) * NUMBER(NR_FREE_PAGES); + + } else { + ERRMSG("Can't get addr_free_pages.\n"); + return FALSE; + } + if (!readmem(VADDR, addr_free_pages, &free_pages, sizeof free_pages)) { + ERRMSG("Can't get free_pages.\n"); + return FALSE; + } + if (free_pages != found_free_pages && !info->flag_cyclic) { + /* + * On linux-2.6.21 or later, the number of free_pages is + * sometimes different from the one of the list "free_area", + * because the former is flushed asynchronously. + */ + DEBUG_MSG("The number of free_pages is invalid.\n"); + DEBUG_MSG(" free_pages = %ld\n", free_pages); + DEBUG_MSG(" found_free_pages = %ld\n", found_free_pages); + } + pfn_free += found_free_pages; + + return TRUE; +} + +static int +dump_log_entry(char *logptr, int fp) +{ + char *msg, *p, *bufp; + unsigned int i, text_len, indent_len, buf_need; + unsigned long long ts_nsec; + char buf[BUFSIZE]; + ulonglong nanos; + ulong rem; + + text_len = USHORT(logptr + OFFSET(printk_log.text_len)); + ts_nsec = ULONGLONG(logptr + OFFSET(printk_log.ts_nsec)); + + nanos = (ulonglong)ts_nsec / (ulonglong)1000000000; + rem = (ulonglong)ts_nsec % (ulonglong)1000000000; + + msg = logptr + SIZE(printk_log); + + bufp = buf; + bufp += sprintf(buf, "[%5lld.%06ld] ", nanos, rem/1000); + indent_len = strlen(buf); + + /* How much buffer space is needed in the worst case */ + buf_need = MAX(sizeof("\\xXX\n"), sizeof("\n") + indent_len); + + for (i = 0, p = msg; i < text_len; i++, p++) { + if (bufp - buf >= sizeof(buf) - buf_need) { + if (write(info->fd_dumpfile, buf, bufp - buf) < 0) + return FALSE; + bufp = buf; + } + + if (*p == '\n') + bufp += sprintf(bufp, "\n%-*s", indent_len, ""); + else if (isprint(*p) || isspace(*p)) + *bufp++ = *p; + else + bufp += sprintf(bufp, "\\x%02x", *p); + } + + *bufp++ = '\n'; + + if (write(info->fd_dumpfile, buf, bufp - buf) < 0) + return FALSE; + else + return TRUE; +} + +/* + * get log record by index; idx must point to valid message. + */ +static char * +log_from_idx(unsigned int idx, char *logbuf) +{ + char *logptr; + unsigned int msglen; + + logptr = logbuf + idx; + + /* + * A length == 0 record is the end of buffer marker. + * Wrap around and return the message at the start of + * the buffer. + */ + + msglen = USHORT(logptr + OFFSET(printk_log.len)); + if (!msglen) + logptr = logbuf; + + return logptr; +} + +static long +log_next(unsigned int idx, char *logbuf) +{ + char *logptr; + unsigned int msglen; + + logptr = logbuf + idx; + + /* + * A length == 0 record is the end of buffer marker. Wrap around and + * read the message at the start of the buffer as *this* one, and + * return the one after that. + */ + + msglen = USHORT(logptr + OFFSET(printk_log.len)); + if (!msglen) { + msglen = USHORT(logbuf + OFFSET(printk_log.len)); + return msglen; + } + + return idx + msglen; +} + +int +dump_dmesg() +{ + int log_buf_len, length_log, length_oldlog, ret = FALSE; + unsigned long index, log_buf, log_end; + unsigned int idx, log_first_idx, log_next_idx; + unsigned long long first_idx_sym; + unsigned long log_end_2_6_24; + unsigned log_end_2_6_25; + char *log_buffer = NULL, *log_ptr = NULL; + + /* + * log_end has been changed to "unsigned" since linux-2.6.25. + * 2.6.24 or former: static unsigned long log_end; + * 2.6.25 or later : static unsigned log_end; + */ + if (!open_files_for_creating_dumpfile()) + return FALSE; + + if (!info->flag_refiltering && !info->flag_sadump) { + if (!get_elf_info(info->fd_memory, info->name_memory)) + return FALSE; + } + if (!initial()) + return FALSE; + + if ((SYMBOL(log_buf) == NOT_FOUND_SYMBOL) + || (SYMBOL(log_buf_len) == NOT_FOUND_SYMBOL)) { + ERRMSG("Can't find some symbols for log_buf.\n"); + return FALSE; + } + /* + * kernel 3.5 variable-length record buffer structure + */ + if (SYMBOL(log_end) == NOT_FOUND_SYMBOL) { + if ((SYMBOL(log_first_idx) == NOT_FOUND_SYMBOL) + || (SYMBOL(log_next_idx) == NOT_FOUND_SYMBOL)) { + ERRMSG("Can't find variable-length record symbols"); + return FALSE; + } else { + if (info->flag_partial_dmesg + && SYMBOL(clear_idx) != NOT_FOUND_SYMBOL) + first_idx_sym = SYMBOL(clear_idx); + else + first_idx_sym = SYMBOL(log_first_idx); + + if (!readmem(VADDR, first_idx_sym, &log_first_idx, + sizeof(log_first_idx))) { + ERRMSG("Can't get log_first_idx.\n"); + return FALSE; + } + if (!readmem(VADDR, SYMBOL(log_next_idx), &log_next_idx, + sizeof(log_next_idx))) { + ERRMSG("Can't get log_next_idx.\n"); + return FALSE; + } + } + } + if (!readmem(VADDR, SYMBOL(log_buf), &log_buf, sizeof(log_buf))) { + ERRMSG("Can't get log_buf.\n"); + return FALSE; + } + if (info->kernel_version < KERNEL_VERSION(3, 5, 0)) { + if (info->kernel_version >= KERNEL_VERSION(2, 6, 25)) { + if (!readmem(VADDR, SYMBOL(log_end), &log_end_2_6_25, + sizeof(log_end_2_6_25))) { + ERRMSG("Can't to get log_end.\n"); + return FALSE; + } + log_end = log_end_2_6_25; + } else { + if (!readmem(VADDR, SYMBOL(log_end), &log_end_2_6_24, + sizeof(log_end_2_6_24))) { + ERRMSG("Can't to get log_end.\n"); + return FALSE; + } + log_end = log_end_2_6_24; + } + } else + log_end = 0; + + if (!readmem(VADDR, SYMBOL(log_buf_len), &log_buf_len, + sizeof(log_buf_len))) { + ERRMSG("Can't get log_buf_len.\n"); + return FALSE; + } + DEBUG_MSG("\n"); + DEBUG_MSG("log_buf : %lx\n", log_buf); + DEBUG_MSG("log_end : %lx\n", log_end); + DEBUG_MSG("log_buf_len : %d\n", log_buf_len); + if (info->flag_partial_dmesg) + DEBUG_MSG("clear_idx : %u\n", log_first_idx); + else + DEBUG_MSG("log_first_idx : %u\n", log_first_idx); + DEBUG_MSG("log_next_idx : %u\n", log_next_idx); + + if ((log_buffer = malloc(log_buf_len)) == NULL) { + ERRMSG("Can't allocate memory for log_buf. %s\n", + strerror(errno)); + return FALSE; + } + + if (info->kernel_version < KERNEL_VERSION(3, 5, 0)) { + if (log_end < log_buf_len) { + length_log = log_end; + if (!readmem(VADDR, log_buf, log_buffer, length_log)) { + ERRMSG("Can't read dmesg log.\n"); + goto out; + } + } else { + index = log_end & (log_buf_len - 1); + DEBUG_MSG("index : %lx\n", index); + length_log = log_buf_len; + length_oldlog = log_buf_len - index; + if (!readmem(VADDR, log_buf + index, log_buffer, length_oldlog)) { + ERRMSG("Can't read old dmesg log.\n"); + goto out; + } + if (!readmem(VADDR, log_buf, log_buffer + length_oldlog, index)) { + ERRMSG("Can't read new dmesg log.\n"); + goto out; + } + } + DEBUG_MSG("length_log : %d\n", length_log); + + if (!open_dump_file()) { + ERRMSG("Can't open output file.\n"); + goto out; + } + if (write(info->fd_dumpfile, log_buffer, length_log) < 0) + goto out; + + if (!close_files_for_creating_dumpfile()) + goto out; + } else { + if (SIZE(printk_log) == NOT_FOUND_STRUCTURE || + OFFSET(printk_log.len) == NOT_FOUND_STRUCTURE || + OFFSET(printk_log.text_len) == NOT_FOUND_STRUCTURE || + OFFSET(printk_log.ts_nsec) == NOT_FOUND_STRUCTURE) { + ERRMSG("Can't get necessary structures for extracting dmesg log.\n"); + goto out; + } + + if (!readmem(VADDR, log_buf, log_buffer, log_buf_len)) { + ERRMSG("Can't read indexed dmesg log.\n"); + goto out; + } + if (!open_dump_file()) { + ERRMSG("Can't open output file.\n"); + goto out; + } + idx = log_first_idx; + while (idx != log_next_idx) { + log_ptr = log_from_idx(idx, log_buffer); + if (!dump_log_entry(log_ptr, info->fd_dumpfile)) + goto out; + idx = log_next(idx, log_buffer); + } + if (!close_files_for_creating_dumpfile()) + goto out; + } + + ret = TRUE; +out: + if (log_buffer) + free(log_buffer); + + return ret; +} + + +int +_exclude_free_page(struct cycle *cycle) +{ + int i, nr_zones, num_nodes, node; + unsigned long node_zones, zone, spanned_pages, pgdat; + struct timeval tv_start; + + if ((node = next_online_node(0)) < 0) { + ERRMSG("Can't get next online node.\n"); + return FALSE; + } + if (!(pgdat = next_online_pgdat(node))) { + ERRMSG("Can't get pgdat list.\n"); + return FALSE; + } + gettimeofday(&tv_start, NULL); + + for (num_nodes = 1; num_nodes <= vt.numnodes; num_nodes++) { + + print_progress(PROGRESS_FREE_PAGES, num_nodes - 1, vt.numnodes, NULL); + + node_zones = pgdat + OFFSET(pglist_data.node_zones); + + if (!readmem(VADDR, pgdat + OFFSET(pglist_data.nr_zones), + &nr_zones, sizeof(nr_zones))) { + ERRMSG("Can't get nr_zones.\n"); + return FALSE; + } + + for (i = 0; i < nr_zones; i++) { + + print_progress(PROGRESS_FREE_PAGES, i + nr_zones * (num_nodes - 1), + nr_zones * vt.numnodes, NULL); + + zone = node_zones + (i * SIZE(zone)); + if (!readmem(VADDR, zone + OFFSET(zone.spanned_pages), + &spanned_pages, sizeof spanned_pages)) { + ERRMSG("Can't get spanned_pages.\n"); + return FALSE; + } + if (!spanned_pages) + continue; + if (!reset_bitmap_of_free_pages(zone, cycle)) + return FALSE; + } + if (num_nodes < vt.numnodes) { + if ((node = next_online_node(node + 1)) < 0) { + ERRMSG("Can't get next online node.\n"); + return FALSE; + } else if (!(pgdat = next_online_pgdat(node))) { + ERRMSG("Can't determine pgdat list (node %d).\n", + node); + return FALSE; + } + } + } + + /* + * print [100 %] + */ + print_progress(PROGRESS_FREE_PAGES, vt.numnodes, vt.numnodes, NULL); + print_execution_time(PROGRESS_FREE_PAGES, &tv_start); + + return TRUE; +} + +int +exclude_free_page(struct cycle *cycle) +{ + /* + * Check having necessary information. + */ + if ((SYMBOL(node_data) == NOT_FOUND_SYMBOL) + && (SYMBOL(pgdat_list) == NOT_FOUND_SYMBOL) + && (SYMBOL(contig_page_data) == NOT_FOUND_SYMBOL)) { + ERRMSG("Can't get necessary symbols for excluding free pages.\n"); + return FALSE; + } + if ((SIZE(zone) == NOT_FOUND_STRUCTURE) + || ((OFFSET(zone.free_pages) == NOT_FOUND_STRUCTURE) + && (OFFSET(zone.vm_stat) == NOT_FOUND_STRUCTURE)) + || (OFFSET(zone.free_area) == NOT_FOUND_STRUCTURE) + || (OFFSET(zone.spanned_pages) == NOT_FOUND_STRUCTURE) + || (OFFSET(pglist_data.node_zones) == NOT_FOUND_STRUCTURE) + || (OFFSET(pglist_data.nr_zones) == NOT_FOUND_STRUCTURE) + || (SIZE(free_area) == NOT_FOUND_STRUCTURE) + || (OFFSET(free_area.free_list) == NOT_FOUND_STRUCTURE) + || (OFFSET(list_head.next) == NOT_FOUND_STRUCTURE) + || (OFFSET(list_head.prev) == NOT_FOUND_STRUCTURE) + || (OFFSET(page.lru) == NOT_FOUND_STRUCTURE) + || (ARRAY_LENGTH(zone.free_area) == NOT_FOUND_STRUCTURE)) { + ERRMSG("Can't get necessary structures for excluding free pages.\n"); + return FALSE; + } + if (is_xen_memory() && !info->dom0_mapnr) { + ERRMSG("Can't get max domain-0 PFN for excluding free pages.\n"); + return FALSE; + } + + /* + * Detect free pages and update 2nd-bitmap. + */ + if (!_exclude_free_page(cycle)) + return FALSE; + + return TRUE; +} + +/* + * For the kernel versions from v2.6.17 to v2.6.37. + */ +static int +page_is_buddy_v2(unsigned long flags, unsigned int _mapcount, + unsigned long private, unsigned int _count) +{ + if (flags & (1UL << NUMBER(PG_buddy))) + return TRUE; + + return FALSE; +} + +/* + * For v2.6.38 and later kernel versions. + */ +static int +page_is_buddy_v3(unsigned long flags, unsigned int _mapcount, + unsigned long private, unsigned int _count) +{ + if (flags & (1UL << NUMBER(PG_slab))) + return FALSE; + + if (_mapcount == (int)NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE)) + return TRUE; + + return FALSE; +} + +static void +setup_page_is_buddy(void) +{ + if (OFFSET(page.private) == NOT_FOUND_STRUCTURE) + goto out; + + if (NUMBER(PG_buddy) == NOT_FOUND_NUMBER) { + if (NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE) != NOT_FOUND_NUMBER) { + if (OFFSET(page._mapcount) != NOT_FOUND_STRUCTURE) + info->page_is_buddy = page_is_buddy_v3; + } + } else + info->page_is_buddy = page_is_buddy_v2; + +out: + if (!info->page_is_buddy) + DEBUG_MSG("Can't select page_is_buddy handler; " + "follow free lists instead of mem_map array.\n"); +} + +/* + * If using a dumpfile in kdump-compressed format as a source file + * instead of /proc/vmcore, 1st-bitmap of a new dumpfile must be + * the same as the one of a source file. + */ +int +copy_1st_bitmap_from_memory(void) +{ + char buf[info->dh_memory->block_size]; + off_t offset_page; + off_t bitmap_offset; + struct disk_dump_header *dh = info->dh_memory; + + bitmap_offset = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size) + * dh->block_size; + + if (lseek(info->fd_memory, bitmap_offset, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + if (lseek(info->bitmap1->fd, info->bitmap1->offset, SEEK_SET) < 0) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + info->bitmap1->file_name, strerror(errno)); + return FALSE; + } + offset_page = 0; + while (offset_page < (info->len_bitmap / 2)) { + if (read(info->fd_memory, buf, sizeof(buf)) != sizeof(buf)) { + ERRMSG("Can't read %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + if (write(info->bitmap1->fd, buf, sizeof(buf)) != sizeof(buf)) { + ERRMSG("Can't write the bitmap(%s). %s\n", + info->bitmap1->file_name, strerror(errno)); + return FALSE; + } + offset_page += sizeof(buf); + } + return TRUE; +} + +int +create_1st_bitmap_file(void) +{ + int i; + unsigned int num_pt_loads = get_num_pt_loads(); + char buf[info->page_size]; + mdf_pfn_t pfn, pfn_start, pfn_end, pfn_bitmap1; + unsigned long long phys_start, phys_end; + struct timeval tv_start; + off_t offset_page; + + if (info->flag_refiltering) + return copy_1st_bitmap_from_memory(); + + if (info->flag_sadump) + return sadump_copy_1st_bitmap_from_memory(); + + /* + * At first, clear all the bits on the 1st-bitmap. + */ + memset(buf, 0, sizeof(buf)); + + if (lseek(info->bitmap1->fd, info->bitmap1->offset, SEEK_SET) < 0) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + info->bitmap1->file_name, strerror(errno)); + return FALSE; + } + offset_page = 0; + while (offset_page < (info->len_bitmap / 2)) { + if (write(info->bitmap1->fd, buf, info->page_size) + != info->page_size) { + ERRMSG("Can't write the bitmap(%s). %s\n", + info->bitmap1->file_name, strerror(errno)); + return FALSE; + } + offset_page += info->page_size; + } + + gettimeofday(&tv_start, NULL); + + /* + * If page is on memory hole, set bit on the 1st-bitmap. + */ + pfn_bitmap1 = 0; + for (i = 0; get_pt_load(i, &phys_start, &phys_end, NULL, NULL); i++) { + + if (!info->flag_mem_usage) + print_progress(PROGRESS_HOLES, i, num_pt_loads, NULL); + + pfn_start = paddr_to_pfn(phys_start); + pfn_end = paddr_to_pfn(phys_end); + if (pfn_start > info->max_mapnr) + continue; + pfn_end = MIN(pfn_end, info->max_mapnr); + /* Account for last page if it has less than page_size data in it */ + if (phys_end & (info->page_size - 1)) + ++pfn_end; + + for (pfn = pfn_start; pfn < pfn_end; pfn++) { + set_bit_on_1st_bitmap(pfn, NULL); + pfn_bitmap1++; + } + } + pfn_memhole = info->max_mapnr - pfn_bitmap1; + + /* + * print 100 % + */ + if (!info->flag_mem_usage) { + print_progress(PROGRESS_HOLES, info->max_mapnr, info->max_mapnr, NULL); + print_execution_time(PROGRESS_HOLES, &tv_start); + } + + if (!sync_1st_bitmap()) + return FALSE; + + return TRUE; +} + +int +create_bitmap_from_memhole(struct cycle *cycle, struct dump_bitmap *bitmap, int count_memhole, + int (*set_bit)(mdf_pfn_t pfn, struct cycle *cycle)); + +int +create_1st_bitmap_buffer(struct cycle *cycle) +{ + return create_bitmap_from_memhole(cycle, info->bitmap1, TRUE, + set_bit_on_1st_bitmap); +} + +int +create_1st_bitmap(struct cycle *cycle) +{ + if (info->bitmap1->fd >= 0) { + return create_1st_bitmap_file(); + } else { + return create_1st_bitmap_buffer(cycle); + } +} + +static inline int +is_in_segs(unsigned long long paddr) +{ + if (info->flag_refiltering || info->flag_sadump) { + if (info->bitmap1->fd < 0) { + initialize_1st_bitmap(info->bitmap1); + create_1st_bitmap_file(); + } + + return is_dumpable(info->bitmap1, paddr_to_pfn(paddr), NULL); + } + + if (paddr_to_offset(paddr)) + return TRUE; + else + return FALSE; +} + +/* + * Exclude the page filled with zero in case of creating an elf dumpfile. + */ +int +exclude_zero_pages_cyclic(struct cycle *cycle) +{ + mdf_pfn_t pfn; + unsigned long long paddr; + unsigned char buf[info->page_size]; + + for (pfn = cycle->start_pfn, paddr = pfn_to_paddr(pfn); pfn < cycle->end_pfn; + pfn++, paddr += info->page_size) { + + if (!is_in_segs(paddr)) + continue; + + if (!sync_2nd_bitmap()) + return FALSE; + + if (!is_dumpable(info->bitmap2, pfn, cycle)) + continue; + + if (!readmem(PADDR, paddr, buf, info->page_size)) { + ERRMSG("Can't get the page data(pfn:%llx, max_mapnr:%llx).\n", + pfn, info->max_mapnr); + return FALSE; + } + if (is_zero_page(buf, info->page_size)) { + if (clear_bit_on_2nd_bitmap(pfn, cycle)) + pfn_zero++; + } + } + + return TRUE; +} + +int +initialize_2nd_bitmap_cyclic(struct cycle *cycle) +{ + return create_bitmap_from_memhole(cycle, info->bitmap2, FALSE, + set_bit_on_2nd_bitmap_for_kernel); +} + +int +create_bitmap_from_memhole(struct cycle *cycle, struct dump_bitmap *bitmap, int count_memhole, + int (*set_bit)(mdf_pfn_t pfn, struct cycle *cycle)) +{ + int i; + mdf_pfn_t pfn; + unsigned long long phys_start, phys_end; + mdf_pfn_t pfn_start, pfn_end; + mdf_pfn_t pfn_start_roundup, pfn_end_round; + unsigned long pfn_start_byte, pfn_end_byte; + unsigned int num_pt_loads = get_num_pt_loads(); + struct timeval tv_start; + + /* + * At first, clear all the bits on the bitmap. + */ + initialize_bitmap(bitmap); + + /* + * If page is on memory hole, set bit on the bitmap. + */ + gettimeofday(&tv_start, NULL); + for (i = 0; get_pt_load(i, &phys_start, &phys_end, NULL, NULL); i++) { + pfn_start = MAX(paddr_to_pfn(phys_start), cycle->start_pfn); + pfn_end = MIN(paddr_to_pfn(phys_end), cycle->end_pfn); + + print_progress(PROGRESS_HOLES, i, num_pt_loads, NULL); + + if (pfn_start >= pfn_end) + continue; + + pfn_start_roundup = MIN(roundup(pfn_start, BITPERBYTE), + pfn_end); + pfn_end_round = MAX(round(pfn_end, BITPERBYTE), pfn_start); + + for (pfn = pfn_start; pfn < pfn_start_roundup; ++pfn) { + if (!set_bit(pfn, cycle)) + return FALSE; + if (count_memhole) + pfn_memhole--; + } + + pfn_start_byte = (pfn_start_roundup - cycle->start_pfn) >> 3; + pfn_end_byte = (pfn_end_round - cycle->start_pfn) >> 3; + + if (pfn_start_byte < pfn_end_byte) { + memset(bitmap->buf + pfn_start_byte, + 0xff, + pfn_end_byte - pfn_start_byte); + if (count_memhole) + pfn_memhole -= (pfn_end_byte - pfn_start_byte) << 3; + } + + if (pfn_end_round >= pfn_start) { + for (pfn = pfn_end_round; pfn < pfn_end; ++pfn) { + if (!set_bit(pfn, cycle)) + return FALSE; + if (count_memhole) + pfn_memhole--; + } + } + } + /* + * print 100 % + */ + print_progress(PROGRESS_HOLES, info->max_mapnr, info->max_mapnr, NULL); + print_execution_time(PROGRESS_HOLES, &tv_start); + + return TRUE; +} + +static void +exclude_range(mdf_pfn_t *counter, mdf_pfn_t pfn, mdf_pfn_t endpfn, + struct cycle *cycle) +{ + if (cycle) { + cycle->exclude_pfn_start = cycle->end_pfn; + cycle->exclude_pfn_end = endpfn; + cycle->exclude_pfn_counter = counter; + + if (cycle->end_pfn < endpfn) + endpfn = cycle->end_pfn; + } + + while (pfn < endpfn) { + if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) + (*counter)++; + ++pfn; + } +} + +int +__exclude_unnecessary_pages(unsigned long mem_map, + mdf_pfn_t pfn_start, mdf_pfn_t pfn_end, struct cycle *cycle) +{ + mdf_pfn_t pfn; + mdf_pfn_t *pfn_counter; + mdf_pfn_t nr_pages; + unsigned long index_pg, pfn_mm; + unsigned long long maddr; + mdf_pfn_t pfn_read_start, pfn_read_end; + unsigned char page_cache[SIZE(page) * PGMM_CACHED]; + unsigned char *pcache; + unsigned int _count, _mapcount = 0, compound_order = 0; + unsigned int order_offset, dtor_offset; + unsigned long flags, mapping, private = 0; + unsigned long compound_dtor, compound_head = 0; + + /* + * If a multi-page exclusion is pending, do it first + */ + if (cycle && cycle->exclude_pfn_start < cycle->exclude_pfn_end) { + exclude_range(cycle->exclude_pfn_counter, + cycle->exclude_pfn_start, cycle->exclude_pfn_end, + cycle); + + mem_map += (cycle->exclude_pfn_end - pfn_start) * SIZE(page); + pfn_start = cycle->exclude_pfn_end; + } + + /* + * Refresh the buffer of struct page, when changing mem_map. + */ + pfn_read_start = ULONGLONG_MAX; + pfn_read_end = 0; + + for (pfn = pfn_start; pfn < pfn_end; pfn++, mem_map += SIZE(page)) { + + /* + * If this pfn doesn't belong to target region, skip this pfn. + */ + if (info->flag_cyclic && !is_cyclic_region(pfn, cycle)) + continue; + + /* + * Exclude the memory hole. + */ + if (is_xen_memory()) { + maddr = ptom_xen(pfn_to_paddr(pfn)); + if (maddr == NOT_PADDR) { + ERRMSG("Can't convert a physical address(%llx) to machine address.\n", + pfn_to_paddr(pfn)); + return FALSE; + } + if (!is_in_segs(maddr)) + continue; + } else { + if (!is_in_segs(pfn_to_paddr(pfn))) + continue; + } + + index_pg = pfn % PGMM_CACHED; + if (pfn < pfn_read_start || pfn_read_end < pfn) { + if (roundup(pfn + 1, PGMM_CACHED) < pfn_end) + pfn_mm = PGMM_CACHED - index_pg; + else + pfn_mm = pfn_end - pfn; + + if (!readmem(VADDR, mem_map, + page_cache + (index_pg * SIZE(page)), + SIZE(page) * pfn_mm)) { + ERRMSG("Can't read the buffer of struct page.\n"); + return FALSE; + } + pfn_read_start = pfn; + pfn_read_end = pfn + pfn_mm - 1; + } + pcache = page_cache + (index_pg * SIZE(page)); + + flags = ULONG(pcache + OFFSET(page.flags)); + _count = UINT(pcache + OFFSET(page._refcount)); + mapping = ULONG(pcache + OFFSET(page.mapping)); + + if (OFFSET(page.compound_order) != NOT_FOUND_STRUCTURE) { + order_offset = OFFSET(page.compound_order); + } else { + if (info->kernel_version < KERNEL_VERSION(4, 4, 0)) + order_offset = OFFSET(page.lru) + OFFSET(list_head.prev); + else + order_offset = 0; + } + + if (OFFSET(page.compound_dtor) != NOT_FOUND_STRUCTURE) { + dtor_offset = OFFSET(page.compound_dtor); + } else { + if (info->kernel_version < KERNEL_VERSION(4, 4, 0)) + dtor_offset = OFFSET(page.lru) + OFFSET(list_head.next); + else + dtor_offset = 0; + } + + compound_order = 0; + compound_dtor = 0; + /* + * The last pfn of the mem_map cache must not be compound head + * page since all compound pages are aligned to its page order + * and PGMM_CACHED is a power of 2. + */ + if ((index_pg < PGMM_CACHED - 1) && isCompoundHead(flags)) { + unsigned char *addr = pcache + SIZE(page); + + if (order_offset) { + if (info->kernel_version >= + KERNEL_VERSION(4, 16, 0)) { + compound_order = + UCHAR(addr + order_offset); + } else { + compound_order = + USHORT(addr + order_offset); + } + } + + if (dtor_offset) { + /* + * compound_dtor has been changed from the address of descriptor + * to the ID of it since linux-4.4. + */ + if (info->kernel_version >= + KERNEL_VERSION(4, 16, 0)) { + compound_dtor = + UCHAR(addr + dtor_offset); + } else if (info->kernel_version >= + KERNEL_VERSION(4, 4, 0)) { + compound_dtor = + USHORT(addr + dtor_offset); + } else { + compound_dtor = + ULONG(addr + dtor_offset); + } + } + + if ((compound_order >= sizeof(unsigned long) * 8) + || ((pfn & ((1UL << compound_order) - 1)) != 0)) { + /* Invalid order */ + compound_order = 0; + } + } + if (OFFSET(page.compound_head) != NOT_FOUND_STRUCTURE) + compound_head = ULONG(pcache + OFFSET(page.compound_head)); + + if (OFFSET(page._mapcount) != NOT_FOUND_STRUCTURE) + _mapcount = UINT(pcache + OFFSET(page._mapcount)); + if (OFFSET(page.private) != NOT_FOUND_STRUCTURE) + private = ULONG(pcache + OFFSET(page.private)); + + nr_pages = 1 << compound_order; + pfn_counter = NULL; + + /* + * Excludable compound tail pages must have already been excluded by + * exclude_range(), don't need to check them here. + */ + if (compound_head & 1) { + continue; + } + /* + * Exclude the free page managed by a buddy + * Use buddy identification of free pages whether cyclic or not. + */ + else if ((info->dump_level & DL_EXCLUDE_FREE) + && info->page_is_buddy + && info->page_is_buddy(flags, _mapcount, private, _count)) { + nr_pages = 1 << private; + pfn_counter = &pfn_free; + } + /* + * Exclude the non-private cache page. + */ + else if ((info->dump_level & DL_EXCLUDE_CACHE) + && is_cache_page(flags) + && !isPrivate(flags) && !isAnon(mapping)) { + pfn_counter = &pfn_cache; + } + /* + * Exclude the cache page whether private or non-private. + */ + else if ((info->dump_level & DL_EXCLUDE_CACHE_PRI) + && is_cache_page(flags) + && !isAnon(mapping)) { + if (isPrivate(flags)) + pfn_counter = &pfn_cache_private; + else + pfn_counter = &pfn_cache; + } + /* + * Exclude the data page of the user process. + * - anonymous pages + * - hugetlbfs pages + */ + else if ((info->dump_level & DL_EXCLUDE_USER_DATA) + && (isAnon(mapping) || isHugetlb(compound_dtor))) { + pfn_counter = &pfn_user; + } + /* + * Exclude the hwpoison page. + */ + else if (isHWPOISON(flags)) { + pfn_counter = &pfn_hwpoison; + } + /* + * Unexcludable page + */ + else + continue; + + /* + * Execute exclusion + */ + if (nr_pages == 1) { + if (clear_bit_on_2nd_bitmap_for_kernel(pfn, cycle)) + (*pfn_counter)++; + } else { + exclude_range(pfn_counter, pfn, pfn + nr_pages, cycle); + pfn += nr_pages - 1; + mem_map += (nr_pages - 1) * SIZE(page); + } + } + return TRUE; +} + +int +exclude_unnecessary_pages(struct cycle *cycle) +{ + unsigned int mm; + struct mem_map_data *mmd; + struct timeval tv_start; + + if (is_xen_memory() && !info->dom0_mapnr) { + ERRMSG("Can't get max domain-0 PFN for excluding pages.\n"); + return FALSE; + } + + gettimeofday(&tv_start, NULL); + + for (mm = 0; mm < info->num_mem_map; mm++) { + + if (!info->flag_mem_usage) + print_progress(PROGRESS_UNN_PAGES, mm, info->num_mem_map, NULL); + + mmd = &info->mem_map_data[mm]; + + if (mmd->mem_map == NOT_MEMMAP_ADDR) + continue; + + if (mmd->pfn_end >= cycle->start_pfn && + mmd->pfn_start <= cycle->end_pfn) { + if (!__exclude_unnecessary_pages(mmd->mem_map, + mmd->pfn_start, mmd->pfn_end, cycle)) + return FALSE; + } + } + /* + * print [100 %] + */ + if (!info->flag_mem_usage) { + print_progress(PROGRESS_UNN_PAGES, info->num_mem_map, info->num_mem_map, NULL); + print_execution_time(PROGRESS_UNN_PAGES, &tv_start); + } + + return TRUE; +} + +int +copy_bitmap_buffer(void) +{ + memcpy(info->bitmap2->buf, info->bitmap1->buf, + info->bufsize_cyclic); + return TRUE; +} + +int +copy_bitmap_file(void) +{ + off_t offset; + unsigned char buf[info->page_size]; + const off_t failed = (off_t)-1; + + offset = 0; + while (offset < (info->len_bitmap / 2)) { + if (lseek(info->bitmap1->fd, info->bitmap1->offset + offset, + SEEK_SET) == failed) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + info->name_bitmap, strerror(errno)); + return FALSE; + } + if (read(info->bitmap1->fd, buf, sizeof(buf)) != sizeof(buf)) { + ERRMSG("Can't read the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + if (lseek(info->bitmap2->fd, info->bitmap2->offset + offset, + SEEK_SET) == failed) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + info->name_bitmap, strerror(errno)); + return FALSE; + } + if (write(info->bitmap2->fd, buf, sizeof(buf)) != sizeof(buf)) { + ERRMSG("Can't write the bitmap(%s). %s\n", + info->name_bitmap, strerror(errno)); + return FALSE; + } + offset += sizeof(buf); + } + + return TRUE; +} + +int +copy_bitmap(void) +{ + if (info->fd_bitmap >= 0) { + return copy_bitmap_file(); + } else { + return copy_bitmap_buffer(); + } +} + +/* + * Initialize the structure for saving pfn's to be deleted. + */ +int +init_save_control() +{ + int flags; + char *filename; + + filename = malloc(50); + *filename = '\0'; + strcpy(filename, info->working_dir); + strcat(filename, "/"); + strcat(filename, "makedumpfilepfns"); + sc.sc_filename = filename; + flags = O_RDWR|O_CREAT|O_TRUNC; + if ((sc.sc_fd = open(sc.sc_filename, flags, S_IRUSR|S_IWUSR)) < 0) { + ERRMSG("Can't open the pfn file %s.\n", sc.sc_filename); + return FAILED; + } + unlink(sc.sc_filename); + + sc.sc_buf = malloc(info->page_size); + if (!sc.sc_buf) { + ERRMSG("Can't allocate a page for pfn buf.\n"); + return FAILED; + } + sc.sc_buflen = info->page_size; + sc.sc_bufposition = 0; + sc.sc_fileposition = 0; + sc.sc_filelen = 0; + return COMPLETED; +} + +/* + * Save a starting pfn and number of pfns for later delete from bitmap. + */ +int +save_deletes(unsigned long startpfn, unsigned long numpfns) +{ + int i; + struct sc_entry *scp; + + if (sc.sc_bufposition == sc.sc_buflen) { + i = write(sc.sc_fd, sc.sc_buf, sc.sc_buflen); + if (i != sc.sc_buflen) { + ERRMSG("save: Can't write a page to %s\n", + sc.sc_filename); + return FAILED; + } + sc.sc_filelen += sc.sc_buflen; + sc.sc_bufposition = 0; + } + scp = (struct sc_entry *)(sc.sc_buf + sc.sc_bufposition); + scp->startpfn = startpfn; + scp->numpfns = numpfns; + sc.sc_bufposition += sizeof(struct sc_entry); + return COMPLETED; +} + +/* + * Get a starting pfn and number of pfns for delete from bitmap. + * Return 0 for success, 1 for 'no more' + */ +int +get_deletes(unsigned long *startpfn, unsigned long *numpfns) +{ + int i; + struct sc_entry *scp; + + if (sc.sc_fileposition >= sc.sc_filelen) { + return FAILED; + } + + if (sc.sc_bufposition == sc.sc_buflen) { + i = read(sc.sc_fd, sc.sc_buf, sc.sc_buflen); + if (i <= 0) { + ERRMSG("Can't read a page from %s.\n", sc.sc_filename); + return FAILED; + } + sc.sc_bufposition = 0; + } + scp = (struct sc_entry *)(sc.sc_buf + sc.sc_bufposition); + *startpfn = scp->startpfn; + *numpfns = scp->numpfns; + sc.sc_bufposition += sizeof(struct sc_entry); + sc.sc_fileposition += sizeof(struct sc_entry); + return COMPLETED; +} + +/* + * Given a range of unused pfn's, check whether we can drop the vmemmap pages + * that represent them. + * (pfn ranges are literally start and end, not start and end+1) + * see the array of vmemmap pfns and the pfns they represent: gvmem_pfns + * Return COMPLETED for delete, FAILED for not to delete. + */ +int +find_vmemmap_pages(unsigned long startpfn, unsigned long endpfn, unsigned long *vmappfn, + unsigned long *nmapnpfns) +{ + int i; + long npfns_offset, vmemmap_offset, vmemmap_pfns, start_vmemmap_pfn; + long npages, end_vmemmap_pfn; + struct vmap_pfns *vmapp; + int pagesize = info->page_size; + + for (i = 0; i < nr_gvmem_pfns; i++) { + vmapp = gvmem_pfns + i; + if ((startpfn >= vmapp->rep_pfn_start) && + (endpfn <= vmapp->rep_pfn_end)) { + npfns_offset = startpfn - vmapp->rep_pfn_start; + vmemmap_offset = npfns_offset * size_table.page; + // round up to a page boundary + if (vmemmap_offset % pagesize) + vmemmap_offset += (pagesize - (vmemmap_offset % pagesize)); + vmemmap_pfns = vmemmap_offset / pagesize; + start_vmemmap_pfn = vmapp->vmap_pfn_start + vmemmap_pfns; + *vmappfn = start_vmemmap_pfn; + + npfns_offset = endpfn - vmapp->rep_pfn_start; + vmemmap_offset = npfns_offset * size_table.page; + // round down to page boundary + vmemmap_offset -= (vmemmap_offset % pagesize); + vmemmap_pfns = vmemmap_offset / pagesize; + end_vmemmap_pfn = vmapp->vmap_pfn_start + vmemmap_pfns; + npages = end_vmemmap_pfn - start_vmemmap_pfn; + if (npages == 0) + return FAILED; + *nmapnpfns = npages; + return COMPLETED; + } + } + return FAILED; +} + +/* + * Find the big holes in bitmap2; they represent ranges for which + * we do not need page structures. + * Bitmap1 is a map of dumpable (i.e existing) pages. + * They must only be pages that exist, so they will be 0 bits + * in the 2nd bitmap but 1 bits in the 1st bitmap. + * For speed, only worry about whole words full of bits. + */ +int +find_unused_vmemmap_pages(void) +{ + struct dump_bitmap *bitmap1 = info->bitmap1; + struct dump_bitmap *bitmap2 = info->bitmap2; + unsigned long long pfn; + unsigned long *lp1, *lp2, startpfn, endpfn; + unsigned long vmapstartpfn, vmapnumpfns; + int i, sz, numpages=0; + int startword, numwords, do_break=0; + long deleted_pages = 0; + off_t new_offset1, new_offset2; + + /* read each block of both bitmaps */ + for (pfn = 0; pfn < info->max_mapnr; pfn += PFN_BUFBITMAP) { /* size in bits */ + numpages++; + new_offset1 = bitmap1->offset + BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP); + if (lseek(bitmap1->fd, new_offset1, SEEK_SET) < 0 ) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + bitmap1->file_name, strerror(errno)); + return FAILED; + } + if (read(bitmap1->fd, bitmap1->buf, BUFSIZE_BITMAP) != BUFSIZE_BITMAP) { + ERRMSG("Can't read the bitmap(%s). %s\n", + bitmap1->file_name, strerror(errno)); + return FAILED; + } + bitmap1->no_block = pfn / PFN_BUFBITMAP; + + new_offset2 = bitmap2->offset + BUFSIZE_BITMAP * (pfn / PFN_BUFBITMAP); + if (lseek(bitmap2->fd, new_offset2, SEEK_SET) < 0 ) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + bitmap2->file_name, strerror(errno)); + return FAILED; + } + if (read(bitmap2->fd, bitmap2->buf, BUFSIZE_BITMAP) != BUFSIZE_BITMAP) { + ERRMSG("Can't read the bitmap(%s). %s\n", + bitmap2->file_name, strerror(errno)); + return FAILED; + } + bitmap2->no_block = pfn / PFN_BUFBITMAP; + + /* process this one page of both bitmaps at a time */ + lp1 = (unsigned long *)bitmap1->buf; + lp2 = (unsigned long *)bitmap2->buf; + /* sz is words in the block */ + sz = BUFSIZE_BITMAP / sizeof(unsigned long); + startword = -1; + for (i = 0; i < sz; i++, lp1++, lp2++) { + /* for each whole word in the block */ + /* deal in full 64-page chunks only */ + if (*lp1 == 0xffffffffffffffffULL) { + if (*lp2 == 0) { + /* we are in a series we want */ + if (startword == -1) { + /* starting a new group */ + startword = i; + } + } else { + /* we hit a used page */ + if (startword >= 0) + do_break = 1; + } + } else { + /* we hit a hole in real memory, or part of one */ + if (startword >= 0) + do_break = 1; + } + if (do_break) { + do_break = 0; + if (startword >= 0) { + numwords = i - startword; + /* 64 bits represents 64 page structs, which + are not even one page of them (takes + at least 73) */ + if (numwords > 1) { + startpfn = pfn + + (startword * BITS_PER_WORD); + /* pfn ranges are literally start and end, + not start and end + 1 */ + endpfn = startpfn + + (numwords * BITS_PER_WORD) - 1; + if (find_vmemmap_pages(startpfn, endpfn, + &vmapstartpfn, &vmapnumpfns) == + COMPLETED) { + if (save_deletes(vmapstartpfn, + vmapnumpfns) == FAILED) { + ERRMSG("save_deletes failed\n"); + return FAILED; + } + deleted_pages += vmapnumpfns; + } + } + } + startword = -1; + } + } + if (startword >= 0) { + numwords = i - startword; + if (numwords > 1) { + startpfn = pfn + (startword * BITS_PER_WORD); + /* pfn ranges are literally start and end, + not start and end + 1 */ + endpfn = startpfn + (numwords * BITS_PER_WORD) - 1; + if (find_vmemmap_pages(startpfn, endpfn, + &vmapstartpfn, &vmapnumpfns) == COMPLETED) { + if (save_deletes(vmapstartpfn, vmapnumpfns) + == FAILED) { + ERRMSG("save_deletes failed\n"); + return FAILED; + } + deleted_pages += vmapnumpfns; + } + } + } + } + PROGRESS_MSG("\nExcluded %ld unused vmemmap pages\n", deleted_pages); + + return COMPLETED; +} + +/* + * Retrieve the list of pfn's and delete them from bitmap2; + */ +void +delete_unused_vmemmap_pages(void) +{ + unsigned long startpfn, numpfns, pfn, i; + + while (get_deletes(&startpfn, &numpfns) == COMPLETED) { + for (i = 0, pfn = startpfn; i < numpfns; i++, pfn++) { + clear_bit_on_2nd_bitmap_for_kernel(pfn, (struct cycle *)0); + // note that this is never to be used in cyclic mode! + } + } + return; +} + +/* + * Finalize the structure for saving pfn's to be deleted. + */ +void +finalize_save_control() +{ + free(sc.sc_buf); + close(sc.sc_fd); + return; +} + +/* + * Reset the structure for saving pfn's to be deleted so that it can be read + */ +int +reset_save_control() +{ + int i; + if (sc.sc_bufposition == 0) + return COMPLETED; + + i = write(sc.sc_fd, sc.sc_buf, sc.sc_buflen); + if (i != sc.sc_buflen) { + ERRMSG("reset: Can't write a page to %s\n", + sc.sc_filename); + return FAILED; + } + sc.sc_filelen += sc.sc_bufposition; + + if (lseek(sc.sc_fd, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek the pfn file %s).", sc.sc_filename); + return FAILED; + } + sc.sc_fileposition = 0; + sc.sc_bufposition = sc.sc_buflen; /* trigger 1st read */ + return COMPLETED; +} + +int +create_2nd_bitmap(struct cycle *cycle) +{ + /* + * At first, clear all the bits on memory hole. + */ + if (info->flag_cyclic) { + /* Have to do it from scratch. */ + initialize_2nd_bitmap_cyclic(cycle); + } else { + /* Can copy 1st-bitmap to 2nd-bitmap. */ + if (!copy_bitmap()) { + ERRMSG("Can't copy 1st-bitmap to 2nd-bitmap.\n"); + return FALSE; + } + } + + /* + * If re-filtering ELF dump, exclude pages that were already + * excluded in the original file. + */ + exclude_nodata_pages(cycle); + + /* + * Exclude cache pages, cache private pages, user data pages, + * and hwpoison pages. + */ + if (info->dump_level & DL_EXCLUDE_CACHE || + info->dump_level & DL_EXCLUDE_CACHE_PRI || + info->dump_level & DL_EXCLUDE_USER_DATA || + NUMBER(PG_hwpoison) != NOT_FOUND_NUMBER || + ((info->dump_level & DL_EXCLUDE_FREE) && info->page_is_buddy)) { + if (!exclude_unnecessary_pages(cycle)) { + ERRMSG("Can't exclude unnecessary pages.\n"); + return FALSE; + } + } + + /* + * Exclude free pages. + */ + if ((info->dump_level & DL_EXCLUDE_FREE) && !info->page_is_buddy) + if (!exclude_free_page(cycle)) + return FALSE; + + /* + * Exclude Xen user domain. + */ + if (info->flag_exclude_xen_dom) { + if (!exclude_xen_user_domain()) { + ERRMSG("Can't exclude xen user domain.\n"); + return FALSE; + } + } + + /* + * Exclude pages filled with zero for creating an ELF dumpfile. + * + * Note: If creating a kdump-compressed dumpfile, makedumpfile + * checks zero-pages while copying dumpable pages to a + * dumpfile from /proc/vmcore. That is valuable for the + * speed, because each page is read one time only. + * Otherwise (if creating an ELF dumpfile), makedumpfile + * should check zero-pages at this time because 2nd-bitmap + * should be fixed for creating an ELF header. That is slow + * due to reading each page two times, but it is necessary. + */ + if ((info->dump_level & DL_EXCLUDE_ZERO) && + (info->flag_elf_dumpfile || info->flag_mem_usage)) { + /* + * 2nd-bitmap should be flushed at this time, because + * exclude_zero_pages() checks 2nd-bitmap. + */ + if (!sync_2nd_bitmap()) + return FALSE; + + if (!exclude_zero_pages_cyclic(cycle)) { + ERRMSG("Can't exclude pages filled with zero for creating an ELF dumpfile.\n"); + return FALSE; + } + } + + if (!sync_2nd_bitmap()) + return FALSE; + + /* --exclude-unused-vm means exclude vmemmap page structures for unused pages */ + if (info->flag_excludevm) { + if (init_save_control() == FAILED) + return FALSE; + if (find_unused_vmemmap_pages() == FAILED) + return FALSE; + if (reset_save_control() == FAILED) + return FALSE; + delete_unused_vmemmap_pages(); + finalize_save_control(); + if (!sync_2nd_bitmap()) + return FALSE; + } + + return TRUE; +} + +int +prepare_bitmap1_buffer(void) +{ + /* + * Prepare bitmap buffers for cyclic processing. + */ + if ((info->bitmap1 = malloc(sizeof(struct dump_bitmap))) == NULL) { + ERRMSG("Can't allocate memory for the 1st bitmaps. %s\n", + strerror(errno)); + return FALSE; + } + + if (info->fd_bitmap >= 0) { + if ((info->bitmap1->buf = (char *)malloc(BUFSIZE_BITMAP)) == NULL) { + ERRMSG("Can't allocate memory for the 1st bitmaps's buffer. %s\n", + strerror(errno)); + return FALSE; + } + } else { + if ((info->bitmap1->buf = (char *)malloc(info->bufsize_cyclic)) == NULL) { + ERRMSG("Can't allocate memory for the 1st bitmaps's buffer. %s\n", + strerror(errno)); + return FALSE; + } + } + initialize_1st_bitmap(info->bitmap1); + + return TRUE; +} + +int +prepare_bitmap2_buffer(void) +{ + unsigned long tmp; + + /* + * Create 2 bitmaps (1st-bitmap & 2nd-bitmap) on block_size + * boundary. The crash utility requires both of them to be + * aligned to block_size boundary. + */ + tmp = divideup(divideup(info->max_mapnr, BITPERBYTE), info->page_size); + info->len_bitmap = tmp * info->page_size * 2; + + /* + * Prepare bitmap buffers for cyclic processing. + */ + if ((info->bitmap2 = malloc(sizeof(struct dump_bitmap))) == NULL) { + ERRMSG("Can't allocate memory for the 2nd bitmaps. %s\n", + strerror(errno)); + return FALSE; + } + if (info->fd_bitmap >= 0) { + if ((info->bitmap2->buf = (char *)malloc(BUFSIZE_BITMAP)) == NULL) { + ERRMSG("Can't allocate memory for the 2nd bitmaps's buffer. %s\n", + strerror(errno)); + return FALSE; + } + } else { + if ((info->bitmap2->buf = (char *)malloc(info->bufsize_cyclic)) == NULL) { + ERRMSG("Can't allocate memory for the 2nd bitmaps's buffer. %s\n", + strerror(errno)); + return FALSE; + } + } + initialize_2nd_bitmap(info->bitmap2); + + return TRUE; +} + +int +prepare_bitmap_buffer(void) +{ + /* + * Prepare bitmap buffers for creating dump bitmap. + */ + prepare_bitmap1_buffer(); + prepare_bitmap2_buffer(); + + return TRUE; +} + +void +free_bitmap1_buffer(void) +{ + if (info->bitmap1) { + if (info->bitmap1->buf) { + free(info->bitmap1->buf); + info->bitmap1->buf = NULL; + } + free(info->bitmap1); + info->bitmap1 = NULL; + } +} + +void +free_bitmap2_buffer(void) +{ + if (info->bitmap2) { + if (info->bitmap2->buf) { + free(info->bitmap2->buf); + info->bitmap2->buf = NULL; + } + free(info->bitmap2); + info->bitmap2 = NULL; + } +} + +void +free_bitmap_buffer(void) +{ + free_bitmap1_buffer(); + free_bitmap2_buffer(); +} + +int +prepare_cache_data(struct cache_data *cd) +{ + cd->fd = info->fd_dumpfile; + cd->file_name = info->name_dumpfile; + cd->cache_size = info->page_size << info->block_order; + cd->buf_size = 0; + cd->buf = NULL; + + if ((cd->buf = malloc(cd->cache_size + info->page_size)) == NULL) { + ERRMSG("Can't allocate memory for the data buffer. %s\n", + strerror(errno)); + return FALSE; + } + return TRUE; +} + +void +free_cache_data(struct cache_data *cd) +{ + free(cd->buf); + cd->buf = NULL; +} + +int +write_start_flat_header() +{ + char buf[MAX_SIZE_MDF_HEADER]; + struct makedumpfile_header fh; + + if (!info->flag_flatten) + return FALSE; + + strcpy(fh.signature, MAKEDUMPFILE_SIGNATURE); + + /* + * For sending dump data to a different architecture, change the values + * to big endian. + */ + if (is_bigendian()){ + fh.type = TYPE_FLAT_HEADER; + fh.version = VERSION_FLAT_HEADER; + } else { + fh.type = bswap_64(TYPE_FLAT_HEADER); + fh.version = bswap_64(VERSION_FLAT_HEADER); + } + + memset(buf, 0, sizeof(buf)); + memcpy(buf, &fh, sizeof(fh)); + + if (!write_and_check_space(info->fd_dumpfile, buf, MAX_SIZE_MDF_HEADER, + info->name_dumpfile)) + return FALSE; + + return TRUE; +} + +int +write_end_flat_header(void) +{ + struct makedumpfile_data_header fdh; + + if (!info->flag_flatten) + return FALSE; + + fdh.offset = END_FLAG_FLAT_HEADER; + fdh.buf_size = END_FLAG_FLAT_HEADER; + + if (!write_and_check_space(info->fd_dumpfile, &fdh, sizeof(fdh), + info->name_dumpfile)) + return FALSE; + + return TRUE; +} + +int +write_elf_phdr(struct cache_data *cd_hdr, Elf64_Phdr *load) +{ + Elf32_Phdr load32; + + if (is_elf64_memory()) { /* ELF64 */ + if (!write_cache(cd_hdr, load, sizeof(Elf64_Phdr))) + return FALSE; + + } else { + memset(&load32, 0, sizeof(Elf32_Phdr)); + load32.p_type = load->p_type; + load32.p_flags = load->p_flags; + load32.p_offset = load->p_offset; + load32.p_vaddr = load->p_vaddr; + load32.p_paddr = load->p_paddr; + load32.p_filesz = load->p_filesz; + load32.p_memsz = load->p_memsz; + load32.p_align = load->p_align; + + if (!write_cache(cd_hdr, &load32, sizeof(Elf32_Phdr))) + return FALSE; + } + return TRUE; +} + +int +write_elf_header(struct cache_data *cd_header) +{ + int i, num_loads_dumpfile, phnum; + off_t offset_note_memory, offset_note_dumpfile; + size_t size_note, size_eraseinfo = 0; + Elf64_Ehdr ehdr64; + Elf32_Ehdr ehdr32; + Elf64_Phdr note; + + char *buf = NULL; + const off_t failed = (off_t)-1; + + int ret = FALSE; + + if (!info->flag_elf_dumpfile) + return FALSE; + + /* + * Get the PT_LOAD number of the dumpfile. + */ + if (!(num_loads_dumpfile = get_loads_dumpfile_cyclic())) { + ERRMSG("Can't get a number of PT_LOAD.\n"); + goto out; + } + + if (is_elf64_memory()) { /* ELF64 */ + if (!get_elf64_ehdr(info->fd_memory, + info->name_memory, &ehdr64)) { + ERRMSG("Can't get ehdr64.\n"); + goto out; + } + /* + * PT_NOTE(1) + PT_LOAD(1+) + */ + ehdr64.e_phnum = 1 + num_loads_dumpfile; + } else { /* ELF32 */ + if (!get_elf32_ehdr(info->fd_memory, + info->name_memory, &ehdr32)) { + ERRMSG("Can't get ehdr32.\n"); + goto out; + } + /* + * PT_NOTE(1) + PT_LOAD(1+) + */ + ehdr32.e_phnum = 1 + num_loads_dumpfile; + } + + /* + * Write an ELF header. + */ + if (is_elf64_memory()) { /* ELF64 */ + if (!write_buffer(info->fd_dumpfile, 0, &ehdr64, sizeof(ehdr64), + info->name_dumpfile)) + goto out; + + } else { /* ELF32 */ + if (!write_buffer(info->fd_dumpfile, 0, &ehdr32, sizeof(ehdr32), + info->name_dumpfile)) + goto out; + } + + /* + * Pre-calculate the required size to store eraseinfo in ELF note + * section so that we can add enough space in ELF notes section and + * adjust the PT_LOAD offset accordingly. + */ + size_eraseinfo = get_size_eraseinfo(); + + /* + * Store the size_eraseinfo for later use in write_elf_eraseinfo() + * function. + */ + info->size_elf_eraseinfo = size_eraseinfo; + + /* + * Write a PT_NOTE header. + */ + if (!(phnum = get_phnum_memory())) + goto out; + + for (i = 0; i < phnum; i++) { + if (!get_phdr_memory(i, ¬e)) + return FALSE; + if (note.p_type == PT_NOTE) + break; + } + if (note.p_type != PT_NOTE) { + ERRMSG("Can't get a PT_NOTE header.\n"); + goto out; + } + + if (is_elf64_memory()) { /* ELF64 */ + cd_header->offset = sizeof(ehdr64); + offset_note_dumpfile = sizeof(ehdr64) + + sizeof(Elf64_Phdr) * ehdr64.e_phnum; + } else { + cd_header->offset = sizeof(ehdr32); + offset_note_dumpfile = sizeof(ehdr32) + + sizeof(Elf32_Phdr) * ehdr32.e_phnum; + } + offset_note_memory = note.p_offset; + note.p_offset = offset_note_dumpfile; + size_note = note.p_filesz; + + /* + * Reserve a space to store the whole program headers. + */ + if (!reserve_diskspace(cd_header->fd, cd_header->offset, + offset_note_dumpfile, cd_header->file_name)) + goto out; + + /* + * Modify the note size in PT_NOTE header to accomodate eraseinfo data. + * Eraseinfo will be written later. + */ + if (info->size_elf_eraseinfo) { + if (is_elf64_memory()) + note.p_filesz += sizeof(Elf64_Nhdr); + else + note.p_filesz += sizeof(Elf32_Nhdr); + note.p_filesz += roundup(ERASEINFO_NOTE_NAME_BYTES, 4) + + roundup(size_eraseinfo, 4); + } + + if (!write_elf_phdr(cd_header, ¬e)) + goto out; + + /* + * Write a PT_NOTE segment. + * PT_LOAD header will be written later. + */ + if ((buf = malloc(size_note)) == NULL) { + ERRMSG("Can't allocate memory for PT_NOTE segment. %s\n", + strerror(errno)); + goto out; + } + if (lseek(info->fd_memory, offset_note_memory, SEEK_SET) == failed) { + ERRMSG("Can't seek the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + goto out; + } + if (read(info->fd_memory, buf, size_note) != size_note) { + ERRMSG("Can't read the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + goto out; + } + if (!write_buffer(info->fd_dumpfile, offset_note_dumpfile, buf, + size_note, info->name_dumpfile)) + goto out; + + /* Set the size_note with new size. */ + size_note = note.p_filesz; + + /* + * Set an offset of PT_LOAD segment. + */ + info->offset_load_dumpfile = offset_note_dumpfile + size_note; + info->offset_note_dumpfile = offset_note_dumpfile; + + ret = TRUE; +out: + if (buf != NULL) + free(buf); + + return ret; +} + +int +write_kdump_header(void) +{ + int ret = FALSE; + size_t size; + off_t offset_note, offset_vmcoreinfo; + unsigned long size_note, size_vmcoreinfo; + struct disk_dump_header *dh = info->dump_header; + struct kdump_sub_header kh; + char *buf = NULL; + + if (info->flag_elf_dumpfile) + return FALSE; + + get_pt_note(&offset_note, &size_note); + + /* + * Write common header + */ + strncpy(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE)); + dh->header_version = 6; + dh->block_size = info->page_size; + dh->sub_hdr_size = sizeof(kh) + size_note; + dh->sub_hdr_size = divideup(dh->sub_hdr_size, dh->block_size); + /* dh->max_mapnr may be truncated, full 64bit in kh.max_mapnr_64 */ + dh->max_mapnr = MIN(info->max_mapnr, UINT_MAX); + dh->nr_cpus = get_nr_cpus(); + dh->bitmap_blocks = divideup(info->len_bitmap, dh->block_size); + memcpy(&dh->timestamp, &info->timestamp, sizeof(dh->timestamp)); + memcpy(&dh->utsname, &info->system_utsname, sizeof(dh->utsname)); + + if (info->flag_excludevm) + dh->status |= DUMP_DH_EXCLUDED_VMEMMAP; + + if (info->flag_compress & DUMP_DH_COMPRESSED_ZLIB) + dh->status |= DUMP_DH_COMPRESSED_ZLIB; +#ifdef USELZO + else if (info->flag_compress & DUMP_DH_COMPRESSED_LZO) + dh->status |= DUMP_DH_COMPRESSED_LZO; +#endif +#ifdef USESNAPPY + else if (info->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) + dh->status |= DUMP_DH_COMPRESSED_SNAPPY; +#endif + + size = sizeof(struct disk_dump_header); + if (!write_buffer(info->fd_dumpfile, 0, dh, size, info->name_dumpfile)) + return FALSE; + + /* + * Write sub header + */ + size = sizeof(struct kdump_sub_header); + memset(&kh, 0, size); + /* 64bit max_mapnr_64 */ + kh.max_mapnr_64 = info->max_mapnr; + kh.phys_base = info->phys_base; + kh.dump_level = info->dump_level; + if (info->flag_split) { + kh.split = 1; + /* + * start_pfn and end_pfn may be truncated, + * only for compatibility purpose + */ + kh.start_pfn = MIN(info->split_start_pfn, UINT_MAX); + kh.end_pfn = MIN(info->split_end_pfn, UINT_MAX); + + /* 64bit start_pfn_64 and end_pfn_64 */ + kh.start_pfn_64 = info->split_start_pfn; + kh.end_pfn_64 = info->split_end_pfn; + } + if (has_pt_note()) { + /* + * Write ELF note section + */ + kh.offset_note + = DISKDUMP_HEADER_BLOCKS * dh->block_size + sizeof(kh); + kh.size_note = size_note; + + buf = malloc(size_note); + if (buf == NULL) { + ERRMSG("Can't allocate memory for ELF note section. %s\n", + strerror(errno)); + return FALSE; + } + + if (!info->flag_sadump) { + if (lseek(info->fd_memory, offset_note, SEEK_SET) < 0) { + ERRMSG("Can't seek the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + goto out; + } + if (read(info->fd_memory, buf, size_note) != size_note) { + ERRMSG("Can't read the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + goto out; + } + } else { + if (!sadump_read_elf_note(buf, size_note)) + goto out; + } + + if (!write_buffer(info->fd_dumpfile, kh.offset_note, buf, + kh.size_note, info->name_dumpfile)) + goto out; + + if (has_vmcoreinfo()) { + get_vmcoreinfo(&offset_vmcoreinfo, &size_vmcoreinfo); + /* + * Set vmcoreinfo data + * + * NOTE: ELF note section contains vmcoreinfo data, and + * kh.offset_vmcoreinfo points the vmcoreinfo data. + */ + kh.offset_vmcoreinfo + = offset_vmcoreinfo - offset_note + + kh.offset_note; + kh.size_vmcoreinfo = size_vmcoreinfo; + } + } + if (!write_buffer(info->fd_dumpfile, dh->block_size, &kh, + size, info->name_dumpfile)) + goto out; + + info->sub_header = kh; + info->offset_bitmap1 + = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size) * dh->block_size; + + ret = TRUE; +out: + if (buf) + free(buf); + + return ret; +} + +/* + * cyclic_split mode: + * manage memory by splitblocks, + * divide memory into splitblocks + * use splitblock_table to record numbers of dumpable pages in each + * splitblock + */ + +/* + * calculate entry size based on the amount of pages in one splitblock + */ +int +calculate_entry_size(void) +{ + int entry_num = 1; + int count = 1; + int entry_size; + + while (entry_num < splitblock->page_per_splitblock) { + entry_num = entry_num << 1; + count++; + } + + entry_size = count / BITPERBYTE; + if (count % BITPERBYTE) + entry_size++; + + return entry_size; +} + +void +write_into_splitblock_table(char *entry, + unsigned long long value) +{ + char temp; + int i = 0; + + while (i++ < splitblock->entry_size) { + temp = value & 0xff; + value = value >> BITPERBYTE; + *entry = temp; + entry++; + } +} + +unsigned long long +read_from_splitblock_table(char *entry) +{ + unsigned long long value = 0; + int i; + + for (i = splitblock->entry_size; i > 0; i--) { + value = value << BITPERBYTE; + value += *(entry + i - 1) & 0xff; + } + + return value; +} + +/* + * The splitblock size is specified as Kbyte with --splitblock-size <size> option. + * If not specified, set default value. + */ +int +check_splitblock_size(void) +{ + if (info->splitblock_size) { + info->splitblock_size <<= 10; + if (info->splitblock_size == 0) { + ERRMSG("The splitblock size could not be 0. %s.\n", + strerror(errno)); + return FALSE; + } + if (info->splitblock_size % info->page_size != 0) { + ERRMSG("The splitblock size must be align to page_size. %s.\n", + strerror(errno)); + return FALSE; + } + } else { + info->splitblock_size = DEFAULT_SPLITBLOCK_SIZE; + } + + return TRUE; +} + +int +prepare_splitblock_table(void) +{ + size_t table_size; + + if (!check_splitblock_size()) + return FALSE; + + if ((splitblock = calloc(1, sizeof(struct SplitBlock))) == NULL) { + ERRMSG("Can't allocate memory for the splitblock. %s.\n", + strerror(errno)); + return FALSE; + } + + splitblock->page_per_splitblock = info->splitblock_size / info->page_size; + splitblock->num = divideup(info->max_mapnr, splitblock->page_per_splitblock); + splitblock->entry_size = calculate_entry_size(); + table_size = splitblock->entry_size * splitblock->num; + + splitblock->table = (char *)calloc(sizeof(char), table_size); + if (!splitblock->table) { + ERRMSG("Can't allocate memory for the splitblock_table. %s.\n", + strerror(errno)); + return FALSE; + } + + return TRUE; +} + +mdf_pfn_t +get_num_dumpable(void) +{ + mdf_pfn_t pfn, num_dumpable; + + initialize_2nd_bitmap(info->bitmap2); + + for (pfn = 0, num_dumpable = 0; pfn < info->max_mapnr; pfn++) { + if (is_dumpable(info->bitmap2, pfn, NULL)) + num_dumpable++; + } + return num_dumpable; +} + +/* + * generate splitblock_table + * modified from function get_num_dumpable_cyclic + */ +mdf_pfn_t +get_num_dumpable_cyclic_withsplit(void) +{ + mdf_pfn_t pfn, num_dumpable = 0; + mdf_pfn_t dumpable_pfn_num = 0, pfn_num = 0; + struct cycle cycle = {0}; + int pos = 0; + + for_each_cycle(0, info->max_mapnr, &cycle) { + if (info->flag_cyclic) { + if (!create_2nd_bitmap(&cycle)) + return FALSE; + } + + for (pfn = cycle.start_pfn; pfn < cycle.end_pfn; pfn++) { + if (is_dumpable(info->bitmap2, pfn, &cycle)) { + num_dumpable++; + dumpable_pfn_num++; + } + if (++pfn_num >= splitblock->page_per_splitblock) { + write_into_splitblock_table(splitblock->table + pos, + dumpable_pfn_num); + pos += splitblock->entry_size; + pfn_num = 0; + dumpable_pfn_num = 0; + } + } + } + + return num_dumpable; +} + +mdf_pfn_t +get_num_dumpable_cyclic_single(void) +{ + mdf_pfn_t pfn, num_dumpable=0; + struct cycle cycle = {0}; + + for_each_cycle(0, info->max_mapnr, &cycle) + { + if (info->flag_cyclic) { + if (!create_2nd_bitmap(&cycle)) + return FALSE; + } + + for(pfn=cycle.start_pfn; pfn<cycle.end_pfn; pfn++) + if (is_dumpable(info->bitmap2, pfn, &cycle)) + num_dumpable++; + } + + return num_dumpable; +} + +mdf_pfn_t +get_num_dumpable_cyclic(void) +{ + if (info->flag_split) + return get_num_dumpable_cyclic_withsplit(); + else + return get_num_dumpable_cyclic_single(); +} + +int +create_dump_bitmap(void) +{ + int ret = FALSE; + + if (info->flag_split) { + if (!prepare_splitblock_table()) + goto out; + } + + if (info->flag_cyclic) { + if (!prepare_bitmap2_buffer()) + goto out; + + info->num_dumpable = get_num_dumpable_cyclic(); + + if (!info->flag_elf_dumpfile) + free_bitmap2_buffer(); + + } else { + struct cycle cycle = {0}; + first_cycle(0, info->max_mapnr, &cycle); + if (!prepare_bitmap_buffer()) + goto out; + + pfn_memhole = info->max_mapnr; + if (!create_1st_bitmap(&cycle)) + goto out; + + if (!create_2nd_bitmap(&cycle)) + goto out; + + info->num_dumpable = get_num_dumpable_cyclic(); + } + + ret = TRUE; +out: + if (ret == FALSE) + free_bitmap_buffer(); + + return ret; +} + +int +write_elf_load_segment(struct cache_data *cd_page, unsigned long long paddr, + off_t off_memory, long long size) +{ + long page_size = info->page_size; + long long bufsz_write; + char buf[info->page_size]; + + off_memory = paddr_to_offset2(paddr, off_memory); + if (!off_memory) { + ERRMSG("Can't convert physaddr(%llx) to an offset.\n", + paddr); + return FALSE; + } + if (lseek(info->fd_memory, off_memory, SEEK_SET) < 0) { + ERRMSG("Can't seek the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + while (size > 0) { + if (size >= page_size) + bufsz_write = page_size; + else + bufsz_write = size; + + if (read(info->fd_memory, buf, bufsz_write) != bufsz_write) { + ERRMSG("Can't read the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + filter_data_buffer((unsigned char *)buf, paddr, bufsz_write); + paddr += bufsz_write; + if (!write_cache(cd_page, buf, bufsz_write)) + return FALSE; + + size -= page_size; + } + return TRUE; +} + +int +read_pfn(mdf_pfn_t pfn, unsigned char *buf) +{ + unsigned long long paddr; + + paddr = pfn_to_paddr(pfn); + if (!readmem(PADDR, paddr, buf, info->page_size)) { + ERRMSG("Can't get the page data.\n"); + return FALSE; + } + + return TRUE; +} + +int +read_pfn_parallel(int fd_memory, mdf_pfn_t pfn, unsigned char *buf, + struct dump_bitmap* bitmap_memory_parallel, + struct mmap_cache *mmap_cache) +{ + unsigned long long paddr; + unsigned long long pgaddr; + + paddr = pfn_to_paddr(pfn); + + pgaddr = PAGEBASE(paddr); + + if (info->flag_refiltering) { + if (!readpage_kdump_compressed_parallel(fd_memory, pgaddr, buf, + bitmap_memory_parallel)) { + ERRMSG("Can't get the page data.\n"); + return FALSE; + } + } else { + char *mapbuf = mappage_elf_parallel(fd_memory, pgaddr, + mmap_cache); + if (mapbuf) { + memcpy(buf, mapbuf, info->page_size); + } else { + if (!readpage_elf_parallel(fd_memory, pgaddr, buf)) { + ERRMSG("Can't get the page data.\n"); + return FALSE; + } + } + } + + return TRUE; +} + +int +get_loads_dumpfile_cyclic(void) +{ + int i, phnum, num_new_load = 0; + long page_size = info->page_size; + mdf_pfn_t pfn, pfn_start, pfn_end, num_excluded; + unsigned long frac_head, frac_tail; + Elf64_Phdr load; + struct cycle cycle = {0}; + + if (!(phnum = get_phnum_memory())) + return FALSE; + + for (i = 0; i < phnum; i++) { + if (!get_phdr_memory(i, &load)) + return FALSE; + if (load.p_type != PT_LOAD) + continue; + + pfn_start = paddr_to_pfn(load.p_paddr); + pfn_end = paddr_to_pfn(load.p_paddr + load.p_memsz); + frac_head = page_size - (load.p_paddr % page_size); + frac_tail = (load.p_paddr + load.p_memsz) % page_size; + + num_new_load++; + num_excluded = 0; + + if (frac_head && (frac_head != page_size)) + pfn_start++; + if (frac_tail) + pfn_end++; + + for_each_cycle(pfn_start, pfn_end, &cycle) { + if (info->flag_cyclic) { + if (!create_2nd_bitmap(&cycle)) + return FALSE; + } + for (pfn = MAX(pfn_start, cycle.start_pfn); pfn < cycle.end_pfn; pfn++) { + if (!is_dumpable(info->bitmap2, pfn, &cycle)) { + num_excluded++; + continue; + } + + /* + * If the number of the contiguous pages to be excluded + * is 256 or more, those pages are excluded really. + * And a new PT_LOAD segment is created. + */ + if (num_excluded >= PFN_EXCLUDED) { + num_new_load++; + } + num_excluded = 0; + } + } + + } + return num_new_load; +} + +int +write_elf_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_page) +{ + int i, phnum; + long page_size = info->page_size; + mdf_pfn_t pfn, pfn_start, pfn_end, num_excluded, num_dumpable, per; + unsigned long long paddr; + unsigned long long memsz, filesz; + unsigned long frac_head, frac_tail; + off_t off_seg_load, off_memory; + Elf64_Phdr load; + struct timeval tv_start; + struct cycle cycle = {0}; + + if (!info->flag_elf_dumpfile) + return FALSE; + + num_dumpable = info->num_dumpable; + per = num_dumpable / 10000; + per = per ? per : 1; + + off_seg_load = info->offset_load_dumpfile; + cd_page->offset = info->offset_load_dumpfile; + + /* + * Reset counter for debug message. + */ + if (info->flag_cyclic) { + pfn_zero = pfn_cache = pfn_cache_private = 0; + pfn_user = pfn_free = pfn_hwpoison = 0; + pfn_memhole = info->max_mapnr; + } + + if (!(phnum = get_phnum_memory())) + return FALSE; + + gettimeofday(&tv_start, NULL); + + for (i = 0; i < phnum; i++) { + if (!get_phdr_memory(i, &load)) + return FALSE; + + if (load.p_type != PT_LOAD) + continue; + + off_memory= load.p_offset; + paddr = load.p_paddr; + pfn_start = paddr_to_pfn(load.p_paddr); + pfn_end = paddr_to_pfn(load.p_paddr + load.p_memsz); + frac_head = page_size - (load.p_paddr % page_size); + frac_tail = (load.p_paddr + load.p_memsz)%page_size; + + num_excluded = 0; + memsz = 0; + filesz = 0; + if (frac_head && (frac_head != page_size)) { + memsz = frac_head; + filesz = frac_head; + pfn_start++; + } + + if (frac_tail) + pfn_end++; + + for_each_cycle(pfn_start, pfn_end, &cycle) { + /* + * Update target region and partial bitmap if necessary. + */ + if (info->flag_cyclic) { + if (!create_2nd_bitmap(&cycle)) + return FALSE; + } + + for (pfn = MAX(pfn_start, cycle.start_pfn); pfn < cycle.end_pfn; pfn++) { + if (!is_dumpable(info->bitmap2, pfn, &cycle)) { + num_excluded++; + if ((pfn == pfn_end - 1) && frac_tail) + memsz += frac_tail; + else + memsz += page_size; + continue; + } + + if ((num_dumped % per) == 0) + print_progress(PROGRESS_COPY, num_dumped, num_dumpable, &tv_start); + + num_dumped++; + + /* + * The dumpable pages are continuous. + */ + if (!num_excluded) { + if ((pfn == pfn_end - 1) && frac_tail) { + memsz += frac_tail; + filesz += frac_tail; + } else { + memsz += page_size; + filesz += page_size; + } + continue; + /* + * If the number of the contiguous pages to be excluded + * is 255 or less, those pages are not excluded. + */ + } else if (num_excluded < PFN_EXCLUDED) { + if ((pfn == pfn_end - 1) && frac_tail) { + memsz += frac_tail; + filesz += (page_size*num_excluded + + frac_tail); + }else { + memsz += page_size; + filesz += (page_size*num_excluded + + page_size); + } + num_excluded = 0; + continue; + } + + /* + * If the number of the contiguous pages to be excluded + * is 256 or more, those pages are excluded really. + * And a new PT_LOAD segment is created. + */ + load.p_memsz = memsz; + load.p_filesz = filesz; + if (load.p_filesz) + load.p_offset = off_seg_load; + else + /* + * If PT_LOAD segment does not have real data + * due to the all excluded pages, the file + * offset is not effective and it should be 0. + */ + load.p_offset = 0; + + /* + * Write a PT_LOAD header. + */ + if (!write_elf_phdr(cd_header, &load)) + return FALSE; + + /* + * Write a PT_LOAD segment. + */ + if (load.p_filesz) + if (!write_elf_load_segment(cd_page, paddr, + off_memory, load.p_filesz)) + return FALSE; + + load.p_paddr += load.p_memsz; +#ifdef __x86__ + /* + * FIXME: + * (x86) Fill PT_LOAD headers with appropriate + * virtual addresses. + */ + if (load.p_paddr < MAXMEM) + load.p_vaddr += load.p_memsz; +#else + load.p_vaddr += load.p_memsz; +#endif /* x86 */ + paddr = load.p_paddr; + off_seg_load += load.p_filesz; + + num_excluded = 0; + memsz = page_size; + filesz = page_size; + } + } + + /* + * Write the last PT_LOAD. + */ + load.p_memsz = memsz; + load.p_filesz = filesz; + load.p_offset = off_seg_load; + + /* + * Write a PT_LOAD header. + */ + if (!write_elf_phdr(cd_header, &load)) + return FALSE; + + /* + * Write a PT_LOAD segment. + */ + if (load.p_filesz) + if (!write_elf_load_segment(cd_page, paddr, + off_memory, load.p_filesz)) + return FALSE; + + off_seg_load += load.p_filesz; + } + if (!write_cache_bufsz(cd_header)) + return FALSE; + if (!write_cache_bufsz(cd_page)) + return FALSE; + + free_bitmap2_buffer(); + + /* + * print [100 %] + */ + print_progress(PROGRESS_COPY, num_dumpable, num_dumpable, &tv_start); + print_execution_time(PROGRESS_COPY, &tv_start); + PROGRESS_MSG("\n"); + + return TRUE; +} + +int +write_cd_buf(struct cache_data *cd) +{ + if (cd->buf_size == 0) + return TRUE; + + if (!write_buffer(cd->fd, cd->offset, cd->buf, + cd->buf_size, cd->file_name)) { + return FALSE; + } + + return TRUE; +} + +/* + * get_nr_pages is used for generating incomplete kdump core. + * When enospac occurs in writing the buf cd_page, it can be used to + * get how many pages have been written. + */ +int +get_nr_pages(void *buf, struct cache_data *cd_page){ + int size, file_end, nr_pages; + page_desc_t *pd = buf; + + file_end = lseek(cd_page->fd, 0, SEEK_END); + if (file_end < 0) { + ERRMSG("Can't seek end of the dump file(%s).\n", cd_page->file_name); + return -1; + } + + size = pd->size; + nr_pages = 0; + while (size <= file_end - cd_page->offset) { + nr_pages++; + pd++; + size += pd->size; + } + + return nr_pages; +} + +int +write_kdump_page(struct cache_data *cd_header, struct cache_data *cd_page, + struct page_desc *pd, void *page_data) +{ + int written_headers_size; + + /* + * If either cd_header or cd_page is nearly full, + * write the buffer cd_header into dumpfile and then write the cd_page. + * With that, when enospc occurs, we can save more useful information. + */ + if (cd_header->buf_size + sizeof(*pd) > cd_header->cache_size + || cd_page->buf_size + pd->size > cd_page->cache_size){ + if( !write_cd_buf(cd_header) ) { + memset(cd_header->buf, 0, cd_header->cache_size); + write_cd_buf(cd_header); + + return FALSE; + } + + if( !write_cd_buf(cd_page) ) { + written_headers_size = sizeof(page_desc_t) * + get_nr_pages(cd_header->buf, cd_page); + if (written_headers_size < 0) + return FALSE; + + memset(cd_header->buf, 0, cd_header->cache_size); + cd_header->offset += written_headers_size; + cd_header->buf_size -= written_headers_size; + write_cd_buf(cd_header); + + return FALSE; + } + cd_header->offset += cd_header->buf_size; + cd_page->offset += cd_page->buf_size; + cd_header->buf_size = 0; + cd_page->buf_size = 0; + } + + write_cache(cd_header, pd, sizeof(page_desc_t)); + write_cache(cd_page, page_data, pd->size); + + return TRUE; +} + +int initialize_zlib(z_stream *stream, int level) +{ + int err; + + stream->zalloc = (alloc_func)Z_NULL; + stream->zfree = (free_func)Z_NULL; + stream->opaque = (voidpf)Z_NULL; + + err = deflateInit(stream, level); + if (err != Z_OK) { + ERRMSG("deflateInit failed: %s\n", zError(err)); + return FALSE; + } + return TRUE; +} + +int compress_mdf (z_stream *stream, Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, int level) +{ + int err; + stream->next_in = (Bytef*)source; + stream->avail_in = (uInt)sourceLen; + stream->next_out = dest; + stream->avail_out = (uInt)*destLen; + if ((uLong)stream->avail_out != *destLen) + return Z_BUF_ERROR; + + err = deflate(stream, Z_FINISH); + + if (err != Z_STREAM_END) { + deflateReset(stream); + return err == Z_OK ? Z_BUF_ERROR : err; + } + *destLen = stream->total_out; + + err = deflateReset(stream); + return err; +} + +int finalize_zlib(z_stream *stream) +{ + int err; + err = deflateEnd(stream); + + return err; +} + +static void +cleanup_mutex(void *mutex) { + pthread_mutex_unlock(mutex); +} + +void * +kdump_thread_function_cyclic(void *arg) { + void *retval = PTHREAD_FAIL; + struct thread_args *kdump_thread_args = (struct thread_args *)arg; + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; + struct cycle *cycle = kdump_thread_args->cycle; + mdf_pfn_t pfn = cycle->start_pfn; + int index = kdump_thread_args->thread_num; + int buf_ready; + int dumpable; + int fd_memory = 0; + struct dump_bitmap bitmap_parallel = {0}; + struct dump_bitmap bitmap_memory_parallel = {0}; + unsigned char *buf = NULL, *buf_out = NULL; + struct mmap_cache *mmap_cache = + MMAP_CACHE_PARALLEL(kdump_thread_args->thread_num); + unsigned long size_out; + z_stream *stream = &ZLIB_STREAM_PARALLEL(kdump_thread_args->thread_num); +#ifdef USELZO + lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->thread_num); +#endif +#ifdef USESNAPPY + unsigned long len_buf_out_snappy = + snappy_max_compressed_length(info->page_size); +#endif + + buf = BUF_PARALLEL(kdump_thread_args->thread_num); + buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num); + + fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->thread_num); + + if (info->fd_bitmap >= 0) { + bitmap_parallel.buf = malloc(BUFSIZE_BITMAP); + if (bitmap_parallel.buf == NULL){ + ERRMSG("Can't allocate memory for bitmap_parallel.buf. %s\n", + strerror(errno)); + goto fail; + } + initialize_2nd_bitmap_parallel(&bitmap_parallel, + kdump_thread_args->thread_num); + } + + if (info->flag_refiltering) { + bitmap_memory_parallel.buf = malloc(BUFSIZE_BITMAP); + if (bitmap_memory_parallel.buf == NULL){ + ERRMSG("Can't allocate memory for bitmap_memory_parallel.buf. %s\n", + strerror(errno)); + goto fail; + } + initialize_bitmap_memory_parallel(&bitmap_memory_parallel, + kdump_thread_args->thread_num); + } + + /* + * filtered page won't take anything + * unfiltered zero page will only take a page_flag_buf + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf + */ + while (pfn < cycle->end_pfn) { + buf_ready = FALSE; + + pthread_mutex_lock(&info->page_data_mutex); + pthread_cleanup_push(cleanup_mutex, &info->page_data_mutex); + while (page_data_buf[index].used != FALSE) { + pthread_testcancel(); + index = (index + 1) % info->num_buffers; + } + page_data_buf[index].used = TRUE; + pthread_cleanup_pop(1); + + while (buf_ready == FALSE) { + pthread_testcancel(); + if (page_flag_buf->ready == FLAG_READY) + continue; + + /* get next dumpable pfn */ + pthread_mutex_lock(&info->current_pfn_mutex); + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { + dumpable = is_dumpable( + info->fd_bitmap >= 0 ? &bitmap_parallel : info->bitmap2, + pfn, + cycle); + if (dumpable) + break; + } + info->current_pfn = pfn + 1; + + page_flag_buf->pfn = pfn; + page_flag_buf->ready = FLAG_FILLING; + pthread_mutex_unlock(&info->current_pfn_mutex); + sem_post(&info->page_flag_buf_sem); + + if (pfn >= cycle->end_pfn) { + info->current_pfn = cycle->end_pfn; + page_data_buf[index].used = FALSE; + break; + } + + if (!read_pfn_parallel(fd_memory, pfn, buf, + &bitmap_memory_parallel, + mmap_cache)) + goto fail; + + filter_data_buffer_parallel(buf, pfn_to_paddr(pfn), + info->page_size, + &info->filter_mutex); + + if ((info->dump_level & DL_EXCLUDE_ZERO) + && is_zero_page(buf, info->page_size)) { + page_flag_buf->zero = TRUE; + goto next; + } + + page_flag_buf->zero = FALSE; + + /* + * Compress the page data. + */ + size_out = kdump_thread_args->len_buf_out; + if ((info->flag_compress & DUMP_DH_COMPRESSED_ZLIB) + && ((size_out = kdump_thread_args->len_buf_out), + compress_mdf(stream, buf_out, &size_out, buf, + info->page_size, + Z_BEST_SPEED) == Z_OK) + && (size_out < info->page_size)) { + page_data_buf[index].flags = + DUMP_DH_COMPRESSED_ZLIB; + page_data_buf[index].size = size_out; + memcpy(page_data_buf[index].buf, buf_out, size_out); +#ifdef USELZO + } else if (info->flag_lzo_support + && (info->flag_compress + & DUMP_DH_COMPRESSED_LZO) + && ((size_out = info->page_size), + lzo1x_1_compress(buf, info->page_size, + buf_out, &size_out, + wrkmem) == LZO_E_OK) + && (size_out < info->page_size)) { + page_data_buf[index].flags = + DUMP_DH_COMPRESSED_LZO; + page_data_buf[index].size = size_out; + memcpy(page_data_buf[index].buf, buf_out, size_out); +#endif +#ifdef USESNAPPY + } else if ((info->flag_compress + & DUMP_DH_COMPRESSED_SNAPPY) + && ((size_out = len_buf_out_snappy), + snappy_compress((char *)buf, + info->page_size, + (char *)buf_out, + (size_t *)&size_out) + == SNAPPY_OK) + && (size_out < info->page_size)) { + page_data_buf[index].flags = + DUMP_DH_COMPRESSED_SNAPPY; + page_data_buf[index].size = size_out; + memcpy(page_data_buf[index].buf, buf_out, size_out); +#endif + } else { + page_data_buf[index].flags = 0; + page_data_buf[index].size = info->page_size; + memcpy(page_data_buf[index].buf, buf, info->page_size); + } + page_flag_buf->index = index; + buf_ready = TRUE; +next: + page_flag_buf->ready = FLAG_READY; + page_flag_buf = page_flag_buf->next; + + } + } + retval = NULL; + +fail: + if (bitmap_memory_parallel.fd >= 0) + close(bitmap_memory_parallel.fd); + if (bitmap_parallel.buf != NULL) + free(bitmap_parallel.buf); + if (bitmap_memory_parallel.buf != NULL) + free(bitmap_memory_parallel.buf); + + pthread_exit(retval); +} + +int +write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, + struct cache_data *cd_page, + struct page_desc *pd_zero, + off_t *offset_data, struct cycle *cycle) +{ + int ret = FALSE; + int res; + unsigned long len_buf_out; + mdf_pfn_t per; + mdf_pfn_t start_pfn, end_pfn; + struct page_desc pd; + struct timeval tv_start; + struct timeval last, new; + pthread_t **threads = NULL; + struct thread_args *kdump_thread_args = NULL; + void *thread_result; + int page_buf_num; + struct page_data *page_data_buf = NULL; + int i; + int index; + int end_count, consuming; + mdf_pfn_t current_pfn, temp_pfn; + + if (info->flag_elf_dumpfile) + return FALSE; + + res = pthread_mutex_init(&info->current_pfn_mutex, NULL); + if (res != 0) { + ERRMSG("Can't initialize current_pfn_mutex. %s\n", + strerror(res)); + goto out; + } + + res = pthread_mutex_init(&info->filter_mutex, NULL); + if (res != 0) { + ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); + goto out; + } + + res = pthread_rwlock_init(&info->usemmap_rwlock, NULL); + if (res != 0) { + ERRMSG("Can't initialize usemmap_rwlock. %s\n", strerror(res)); + goto out; + } + + len_buf_out = calculate_len_buf_out(info->page_size); + + per = info->num_dumpable / 10000; + per = per ? per : 1; + + gettimeofday(&tv_start, NULL); + + start_pfn = cycle->start_pfn; + end_pfn = cycle->end_pfn; + + info->current_pfn = start_pfn; + + threads = info->threads; + kdump_thread_args = info->kdump_thread_args; + + page_buf_num = info->num_buffers; + page_data_buf = info->page_data_buf; + pthread_mutex_init(&info->page_data_mutex, NULL); + sem_init(&info->page_flag_buf_sem, 0, 0); + + for (i = 0; i < page_buf_num; i++) + page_data_buf[i].used = FALSE; + + for (i = 0; i < info->num_threads; i++) { + kdump_thread_args[i].thread_num = i; + kdump_thread_args[i].len_buf_out = len_buf_out; + kdump_thread_args[i].page_data_buf = page_data_buf; + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; + kdump_thread_args[i].cycle = cycle; + + res = pthread_create(threads[i], NULL, + kdump_thread_function_cyclic, + (void *)&kdump_thread_args[i]); + if (res != 0) { + ERRMSG("Can't create thread %d. %s\n", + i, strerror(res)); + goto out; + } + } + + end_count = 0; + while (1) { + consuming = 0; + + /* + * The basic idea is producer producing page and consumer writing page. + * Each producer have a page_flag_buf list which is used for storing page's description. + * The size of page_flag_buf is little so it won't take too much memory. + * And all producers will share a page_data_buf array which is used for storing page's compressed data. + * The main thread is the consumer. It will find the next pfn and write it into file. + * The next pfn is smallest pfn in all page_flag_buf. + */ + sem_wait(&info->page_flag_buf_sem); + gettimeofday(&last, NULL); + while (1) { + current_pfn = end_pfn; + + /* + * page_flag_buf is in circular linked list. + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's + * page_flag_buf list. + * consuming is used for recording in which thread the pfn is the smallest. + * current_pfn is used for recording the value of pfn when checking the pfn. + */ + for (i = 0; i < info->num_threads; i++) { + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) + continue; + temp_pfn = info->page_flag_buf[i]->pfn; + + /* + * count how many threads have reached the end. + */ + if (temp_pfn >= end_pfn) { + info->page_flag_buf[i]->ready = FLAG_UNUSED; + end_count++; + continue; + } + + if (current_pfn < temp_pfn) + continue; + + consuming = i; + current_pfn = temp_pfn; + } + + /* + * If all the threads have reached the end, we will finish writing. + */ + if (end_count >= info->num_threads) + goto finish; + + /* + * If the page_flag_buf is not ready, the pfn recorded may be changed. + * So we should recheck. + */ + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { + gettimeofday(&new, NULL); + if (new.tv_sec - last.tv_sec > WAIT_TIME) { + ERRMSG("Can't get data of pfn.\n"); + goto out; + } + continue; + } + + if (current_pfn == info->page_flag_buf[consuming]->pfn) + break; + } + + if ((num_dumped % per) == 0) + print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable, &tv_start); + + num_dumped++; + + + if (info->page_flag_buf[consuming]->zero == TRUE) { + if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) + goto out; + pfn_zero++; + } else { + index = info->page_flag_buf[consuming]->index; + pd.flags = page_data_buf[index].flags; + pd.size = page_data_buf[index].size; + pd.page_flags = 0; + pd.offset = *offset_data; + *offset_data += pd.size; + /* + * Write the page header. + */ + if (!write_cache(cd_header, &pd, sizeof(page_desc_t))) + goto out; + /* + * Write the page data. + */ + if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) + goto out; + page_data_buf[index].used = FALSE; + } + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; + } +finish: + ret = TRUE; + /* + * print [100 %] + */ + print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable, &tv_start); + print_execution_time(PROGRESS_COPY, &tv_start); + PROGRESS_MSG("\n"); + +out: + if (threads != NULL) { + for (i = 0; i < info->num_threads; i++) { + if (threads[i] != NULL) { + res = pthread_cancel(*threads[i]); + if (res != 0 && res != ESRCH) + ERRMSG("Can't cancel thread %d. %s\n", + i, strerror(res)); + } + } + + for (i = 0; i < info->num_threads; i++) { + if (threads[i] != NULL) { + res = pthread_join(*threads[i], &thread_result); + if (res != 0) + ERRMSG("Can't join with thread %d. %s\n", + i, strerror(res)); + + if (thread_result == PTHREAD_CANCELED) + DEBUG_MSG("Thread %d is cancelled.\n", i); + else if (thread_result == PTHREAD_FAIL) + DEBUG_MSG("Thread %d fails.\n", i); + else + DEBUG_MSG("Thread %d finishes.\n", i); + + } + } + } + + sem_destroy(&info->page_flag_buf_sem); + pthread_rwlock_destroy(&info->usemmap_rwlock); + pthread_mutex_destroy(&info->filter_mutex); + pthread_mutex_destroy(&info->current_pfn_mutex); + + return ret; +} + +int +write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_page, + struct page_desc *pd_zero, off_t *offset_data, struct cycle *cycle) +{ + mdf_pfn_t pfn, per; + mdf_pfn_t start_pfn, end_pfn; + unsigned long size_out; + struct page_desc pd; + unsigned char buf[info->page_size], *buf_out = NULL; + unsigned long len_buf_out; + struct timeval tv_start; + const off_t failed = (off_t)-1; + unsigned long len_buf_out_zlib, len_buf_out_lzo, len_buf_out_snappy; + + int ret = FALSE; + + if (info->flag_elf_dumpfile) + return FALSE; + + len_buf_out_zlib = len_buf_out_lzo = len_buf_out_snappy = 0; + +#ifdef USELZO + lzo_bytep wrkmem; + + if ((wrkmem = malloc(LZO1X_1_MEM_COMPRESS)) == NULL) { + ERRMSG("Can't allocate memory for the working memory. %s\n", + strerror(errno)); + goto out; + } + + len_buf_out_lzo = info->page_size + info->page_size / 16 + 64 + 3; +#endif +#ifdef USESNAPPY + len_buf_out_snappy = snappy_max_compressed_length(info->page_size); +#endif + + len_buf_out_zlib = compressBound(info->page_size); + + len_buf_out = MAX(len_buf_out_zlib, + MAX(len_buf_out_lzo, + len_buf_out_snappy)); + + if ((buf_out = malloc(len_buf_out)) == NULL) { + ERRMSG("Can't allocate memory for the compression buffer. %s\n", + strerror(errno)); + goto out; + } + + per = info->num_dumpable / 10000; + per = per ? per : 1; + + /* + * Set a fileoffset of Physical Address 0x0. + */ + if (lseek(info->fd_memory, get_offset_pt_load_memory(), SEEK_SET) + == failed) { + ERRMSG("Can't seek the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + goto out; + } + + start_pfn = cycle->start_pfn; + end_pfn = cycle->end_pfn; + + if (info->flag_split) { + if (start_pfn < info->split_start_pfn) + start_pfn = info->split_start_pfn; + if (end_pfn > info->split_end_pfn) + end_pfn = info->split_end_pfn; + } + + gettimeofday(&tv_start, NULL); + + for (pfn = start_pfn; pfn < end_pfn; pfn++) { + + if ((num_dumped % per) == 0) + print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable, &tv_start); + + /* + * Check the excluded page. + */ + if (!is_dumpable(info->bitmap2, pfn, cycle)) + continue; + + num_dumped++; + if (!read_pfn(pfn, buf)) + goto out; + + filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); + + /* + * Exclude the page filled with zeros. + */ + if ((info->dump_level & DL_EXCLUDE_ZERO) + && is_zero_page(buf, info->page_size)) { + if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) + goto out; + pfn_zero++; + continue; + } + /* + * Compress the page data. + */ + size_out = len_buf_out; + if ((info->flag_compress & DUMP_DH_COMPRESSED_ZLIB) + && ((size_out = len_buf_out), + compress2(buf_out, &size_out, buf, info->page_size, + Z_BEST_SPEED) == Z_OK) + && (size_out < info->page_size)) { + pd.flags = DUMP_DH_COMPRESSED_ZLIB; + pd.size = size_out; +#ifdef USELZO + } else if (info->flag_lzo_support + && (info->flag_compress & DUMP_DH_COMPRESSED_LZO) + && ((size_out = info->page_size), + lzo1x_1_compress(buf, info->page_size, buf_out, + &size_out, wrkmem) == LZO_E_OK) + && (size_out < info->page_size)) { + pd.flags = DUMP_DH_COMPRESSED_LZO; + pd.size = size_out; +#endif +#ifdef USESNAPPY + } else if ((info->flag_compress & DUMP_DH_COMPRESSED_SNAPPY) + && ((size_out = len_buf_out_snappy), + snappy_compress((char *)buf, info->page_size, + (char *)buf_out, + (size_t *)&size_out) + == SNAPPY_OK) + && (size_out < info->page_size)) { + pd.flags = DUMP_DH_COMPRESSED_SNAPPY; + pd.size = size_out; +#endif + } else { + pd.flags = 0; + pd.size = info->page_size; + } + pd.page_flags = 0; + pd.offset = *offset_data; + *offset_data += pd.size; + + /* + * Write the page header and the page data + */ + if (!write_kdump_page(cd_header, cd_page, &pd, pd.flags ? buf_out : buf)) + goto out; + } + + ret = TRUE; +out: + if (buf_out != NULL) + free(buf_out); +#ifdef USELZO + if (wrkmem != NULL) + free(wrkmem); +#endif + + print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable, &tv_start); + print_execution_time(PROGRESS_COPY, &tv_start); + + return ret; +} + +/* + * Copy eraseinfo from input dumpfile/vmcore to output dumpfile. + */ +static int +copy_eraseinfo(struct cache_data *cd_eraseinfo) +{ + char *buf = NULL; + off_t offset; + unsigned long size; + int ret = FALSE; + + get_eraseinfo(&offset, &size); + buf = malloc(size); + if (buf == NULL) { + ERRMSG("Can't allocate memory for erase info section. %s\n", + strerror(errno)); + return FALSE; + } + if (lseek(info->fd_memory, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + goto out; + } + if (read(info->fd_memory, buf, size) != size) { + ERRMSG("Can't read the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + goto out; + } + if (!write_cache(cd_eraseinfo, buf, size)) + goto out; + ret = TRUE; +out: + if (buf) + free(buf); + return ret; +} + +static int +update_eraseinfo_of_sub_header(off_t offset_eraseinfo, + unsigned long size_eraseinfo) +{ + off_t offset; + + /* seek to kdump sub header offset */ + offset = DISKDUMP_HEADER_BLOCKS * info->page_size; + + info->sub_header.offset_eraseinfo = offset_eraseinfo; + info->sub_header.size_eraseinfo = size_eraseinfo; + + if (!write_buffer(info->fd_dumpfile, offset, &info->sub_header, + sizeof(struct kdump_sub_header), info->name_dumpfile)) + return FALSE; + + return TRUE; +} + +/* + * Traverse through eraseinfo nodes and write it to the o/p dumpfile if the + * node has erased flag set. + */ +int +write_eraseinfo(struct cache_data *cd_page, unsigned long *size_out) +{ + int i, j, obuf_size = 0, ei_size = 0; + int ret = FALSE; + unsigned long size_eraseinfo = 0; + char *obuf = NULL; + char size_str[MAX_SIZE_STR_LEN]; + + for (i = 1; i < num_erase_info; i++) { + if (!erase_info[i].erased) + continue; + for (j = 0; j < erase_info[i].num_sizes; j++) { + if (erase_info[i].sizes[j] > 0) + sprintf(size_str, "size %ld\n", + erase_info[i].sizes[j]); + else if (erase_info[i].sizes[j] == -1) + sprintf(size_str, "nullify\n"); + + /* Calculate the required buffer size. */ + ei_size = strlen("erase ") + + strlen(erase_info[i].symbol_expr) + 1 + + strlen(size_str) + + 1; + /* + * If obuf is allocated in the previous run and is + * big enough to hold current erase info string then + * reuse it otherwise realloc. + */ + if (ei_size > obuf_size) { + obuf_size = ei_size; + obuf = realloc(obuf, obuf_size); + if (!obuf) { + ERRMSG("Can't allocate memory for" + " output buffer\n"); + return FALSE; + } + } + sprintf(obuf, "erase %s %s", erase_info[i].symbol_expr, + size_str); + DEBUG_MSG("%s", obuf); + if (!write_cache(cd_page, obuf, strlen(obuf))) + goto out; + size_eraseinfo += strlen(obuf); + } + } + /* + * Write the remainder. + */ + if (!write_cache_bufsz(cd_page)) + goto out; + + *size_out = size_eraseinfo; + ret = TRUE; +out: + if (obuf) + free(obuf); + + return ret; +} + +int +write_elf_eraseinfo(struct cache_data *cd_header) +{ + char note[MAX_SIZE_NHDR]; + char buf[ERASEINFO_NOTE_NAME_BYTES + 4]; + off_t offset_eraseinfo; + unsigned long note_header_size, size_written, size_note; + + DEBUG_MSG("erase info size: %lu\n", info->size_elf_eraseinfo); + + if (!info->size_elf_eraseinfo) + return TRUE; + + DEBUG_MSG("Writing erase info...\n"); + + /* calculate the eraseinfo ELF note offset */ + get_pt_note(NULL, &size_note); + cd_header->offset = info->offset_note_dumpfile + + roundup(size_note, 4); + + /* Write eraseinfo ELF note header. */ + memset(note, 0, sizeof(note)); + if (is_elf64_memory()) { + Elf64_Nhdr *nh = (Elf64_Nhdr *)note; + + note_header_size = sizeof(Elf64_Nhdr); + nh->n_namesz = ERASEINFO_NOTE_NAME_BYTES; + nh->n_descsz = info->size_elf_eraseinfo; + nh->n_type = 0; + } else { + Elf32_Nhdr *nh = (Elf32_Nhdr *)note; + + note_header_size = sizeof(Elf32_Nhdr); + nh->n_namesz = ERASEINFO_NOTE_NAME_BYTES; + nh->n_descsz = info->size_elf_eraseinfo; + nh->n_type = 0; + } + if (!write_cache(cd_header, note, note_header_size)) + return FALSE; + + /* Write eraseinfo Note name */ + memset(buf, 0, sizeof(buf)); + memcpy(buf, ERASEINFO_NOTE_NAME, ERASEINFO_NOTE_NAME_BYTES); + if (!write_cache(cd_header, buf, + roundup(ERASEINFO_NOTE_NAME_BYTES, 4))) + return FALSE; + + offset_eraseinfo = cd_header->offset; + if (!write_eraseinfo(cd_header, &size_written)) + return FALSE; + + /* + * The actual eraseinfo written may be less than pre-calculated size. + * Hence fill up the rest of size with zero's. + */ + if (size_written < info->size_elf_eraseinfo) + write_cache_zero(cd_header, + info->size_elf_eraseinfo - size_written); + + DEBUG_MSG("offset_eraseinfo: %llx, size_eraseinfo: %ld\n", + (unsigned long long)offset_eraseinfo, info->size_elf_eraseinfo); + + return TRUE; +} + +int +write_kdump_eraseinfo(struct cache_data *cd_page) +{ + off_t offset_eraseinfo; + unsigned long size_eraseinfo, size_written; + + DEBUG_MSG("Writing erase info...\n"); + offset_eraseinfo = cd_page->offset; + + /* + * In case of refiltering copy the existing eraseinfo from input + * dumpfile to o/p dumpfile. + */ + if (has_eraseinfo()) { + get_eraseinfo(NULL, &size_eraseinfo); + if (!copy_eraseinfo(cd_page)) + return FALSE; + } else + size_eraseinfo = 0; + + if (!write_eraseinfo(cd_page, &size_written)) + return FALSE; + + size_eraseinfo += size_written; + DEBUG_MSG("offset_eraseinfo: %llx, size_eraseinfo: %ld\n", + (unsigned long long)offset_eraseinfo, size_eraseinfo); + + if (size_eraseinfo) + /* Update the erase info offset and size in kdump sub header */ + if (!update_eraseinfo_of_sub_header(offset_eraseinfo, + size_eraseinfo)) + return FALSE; + + return TRUE; +} + +int +write_kdump_bitmap_file(struct dump_bitmap *bitmap) +{ + struct cache_data bm; + long long buf_size; + off_t offset; + + int ret = FALSE; + + if (info->flag_elf_dumpfile) + return FALSE; + + bm.fd = info->fd_bitmap; + bm.file_name = info->name_bitmap; + bm.offset = bitmap->offset; + bm.buf = NULL; + + if ((bm.buf = calloc(1, BUFSIZE_BITMAP)) == NULL) { + ERRMSG("Can't allocate memory for dump bitmap buffer. %s\n", + strerror(errno)); + goto out; + } + buf_size = info->len_bitmap / 2; + offset = info->offset_bitmap1 + bitmap->offset; + while (buf_size > 0) { + if (buf_size >= BUFSIZE_BITMAP) + bm.cache_size = BUFSIZE_BITMAP; + else + bm.cache_size = buf_size; + + if(!read_cache(&bm)) + goto out; + + if (!write_buffer(info->fd_dumpfile, offset, + bm.buf, bm.cache_size, info->name_dumpfile)) + goto out; + + offset += bm.cache_size; + buf_size -= BUFSIZE_BITMAP; + } + ret = TRUE; +out: + if (bm.buf != NULL) + free(bm.buf); + + return ret; +} + +int +write_kdump_bitmap1_file(void) +{ + return write_kdump_bitmap_file(info->bitmap1); +} + +int +write_kdump_bitmap2_file(void) +{ + return write_kdump_bitmap_file(info->bitmap2); +} + +int +write_kdump_bitmap1_buffer(struct cycle *cycle) +{ + off_t offset; + int increment; + int ret = FALSE; + + increment = divideup(cycle->end_pfn - cycle->start_pfn, BITPERBYTE); + + if (info->flag_elf_dumpfile) + return FALSE; + + offset = info->offset_bitmap1; + if (!write_buffer(info->fd_dumpfile, offset + info->bufsize_cyclic * + (cycle->start_pfn / info->pfn_cyclic), + info->bitmap1->buf, increment, info->name_dumpfile)) + goto out; + + ret = TRUE; +out: + return ret; +} + +int +write_kdump_bitmap2_buffer(struct cycle *cycle) +{ + off_t offset; + int increment; + int ret = FALSE; + + increment = divideup(cycle->end_pfn - cycle->start_pfn, + BITPERBYTE); + + if (info->flag_elf_dumpfile) + return FALSE; + + offset = info->offset_bitmap1; + offset += info->len_bitmap / 2; + if (!write_buffer(info->fd_dumpfile, offset, + info->bitmap2->buf, increment, info->name_dumpfile)) + goto out; + + info->offset_bitmap1 += increment; + + ret = TRUE; +out: + + return ret; +} + +int +write_kdump_bitmap1(struct cycle *cycle) { + if (info->bitmap1->fd >= 0) { + return write_kdump_bitmap1_file(); + } else { + return write_kdump_bitmap1_buffer(cycle); + } +} + +int +write_kdump_bitmap2(struct cycle *cycle) { + if (info->bitmap2->fd >= 0) { + return write_kdump_bitmap2_file(); + } else { + return write_kdump_bitmap2_buffer(cycle); + } +} + +int +write_kdump_pages_and_bitmap_cyclic(struct cache_data *cd_header, struct cache_data *cd_page) +{ + struct page_desc pd_zero; + off_t offset_data=0; + struct disk_dump_header *dh = info->dump_header; + unsigned char buf[info->page_size]; + struct timeval tv_start; + + cd_header->offset + = (DISKDUMP_HEADER_BLOCKS + dh->sub_hdr_size + dh->bitmap_blocks) + * dh->block_size; + cd_page->offset = cd_header->offset + sizeof(page_desc_t)*info->num_dumpable; + offset_data = cd_page->offset; + + /* + * Write the data of zero-filled page. + */ + if (info->dump_level & DL_EXCLUDE_ZERO) { + pd_zero.size = info->page_size; + pd_zero.flags = 0; + pd_zero.offset = offset_data; + pd_zero.page_flags = 0; + memset(buf, 0, pd_zero.size); + if (!write_cache(cd_page, buf, pd_zero.size)) + return FALSE; + offset_data += pd_zero.size; + } + + if (info->flag_cyclic) { + /* + * Reset counter for debug message. + */ + pfn_zero = pfn_cache = pfn_cache_private = 0; + pfn_user = pfn_free = pfn_hwpoison = 0; + pfn_memhole = info->max_mapnr; + + /* + * Write the 1st bitmap + */ + if (!prepare_bitmap1_buffer()) + return FALSE; + } + + struct cycle cycle = {0}; + for_each_cycle(0, info->max_mapnr, &cycle) + { + if (info->flag_cyclic) { + if (!create_1st_bitmap(&cycle)) + return FALSE; + } + if (!write_kdump_bitmap1(&cycle)) + return FALSE; + } + + free_bitmap1_buffer(); + if (info->flag_cyclic) { + if (!prepare_bitmap2_buffer()) + return FALSE; + } + + /* + * Write pages and bitmap cyclically. + */ + //cycle = {0, 0}; + memset(&cycle, 0, sizeof(struct cycle)); + for_each_cycle(0, info->max_mapnr, &cycle) + { + if (info->flag_cyclic) { + if (!create_2nd_bitmap(&cycle)) + return FALSE; + } + + if (!write_kdump_bitmap2(&cycle)) + return FALSE; + + if (info->num_threads) { + if (!write_kdump_pages_parallel_cyclic(cd_header, + cd_page, &pd_zero, + &offset_data, &cycle)) + return FALSE; + } else { + if (!write_kdump_pages_cyclic(cd_header, cd_page, &pd_zero, + &offset_data, &cycle)) + return FALSE; + } + } + free_bitmap2_buffer(); + + gettimeofday(&tv_start, NULL); + + /* + * Write the remainder. + */ + if (!write_cache_bufsz(cd_page)) + return FALSE; + if (!write_cache_bufsz(cd_header)) + return FALSE; + + /* + * print [100 %] + */ + print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable, &tv_start); + print_execution_time(PROGRESS_COPY, &tv_start); + PROGRESS_MSG("\n"); + + return TRUE; +} + +void +close_vmcoreinfo(void) +{ + if(fclose(info->file_vmcoreinfo) < 0) + ERRMSG("Can't close the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + info->file_vmcoreinfo = NULL; +} + +void +close_dump_memory(void) +{ + if (close(info->fd_memory) < 0) + ERRMSG("Can't close the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + info->fd_memory = -1; +} + +void +close_dump_file(void) +{ + if (info->flag_flatten) + return; + + if (close(info->fd_dumpfile) < 0) + ERRMSG("Can't close the dump file(%s). %s\n", + info->name_dumpfile, strerror(errno)); + info->fd_dumpfile = -1; +} + +void +close_dump_bitmap(void) +{ + if (info->fd_bitmap < 0) + return; + + if (close(info->fd_bitmap) < 0) + ERRMSG("Can't close the bitmap file(%s). %s\n", + info->name_bitmap, strerror(errno)); + info->fd_bitmap = -1; + free(info->name_bitmap); + info->name_bitmap = NULL; +} + +void +close_kernel_file(void) +{ + if (info->name_vmlinux) { + if (close(info->fd_vmlinux) < 0) { + ERRMSG("Can't close the kernel file(%s). %s\n", + info->name_vmlinux, strerror(errno)); + } + info->fd_vmlinux = -1; + } + if (info->name_xen_syms) { + if (close(info->fd_xen_syms) < 0) { + ERRMSG("Can't close the kernel file(%s). %s\n", + info->name_xen_syms, strerror(errno)); + } + info->fd_xen_syms = -1; + } +} + +/* + * Close the following files when it generates the vmcoreinfo file. + * - vmlinux + * - vmcoreinfo file + */ +int +close_files_for_generating_vmcoreinfo(void) +{ + close_kernel_file(); + + close_vmcoreinfo(); + + return TRUE; +} + +/* + * Close the following file when it rearranges the dump data. + * - dump file + */ +int +close_files_for_rearranging_dumpdata(void) +{ + close_dump_file(); + + return TRUE; +} + +/* + * Close the following files when it creates the dump file. + * - dump mem + * - bit map + * if it reads the vmcoreinfo file + * - vmcoreinfo file + * else + * - vmlinux + */ +int +close_files_for_creating_dumpfile(void) +{ + if (info->max_dump_level > DL_EXCLUDE_ZERO) + close_kernel_file(); + + /* free name for vmcoreinfo */ + if (has_vmcoreinfo()) { + free(info->name_vmcoreinfo); + info->name_vmcoreinfo = NULL; + } + close_dump_memory(); + + close_dump_bitmap(); + + return TRUE; +} + +/* + * for Xen extraction + */ +int +get_symbol_info_xen(void) +{ + /* + * Common symbol + */ + SYMBOL_INIT(dom_xen, "dom_xen"); + SYMBOL_INIT(dom_io, "dom_io"); + SYMBOL_INIT(domain_list, "domain_list"); + SYMBOL_INIT(frame_table, "frame_table"); + SYMBOL_INIT(alloc_bitmap, "alloc_bitmap"); + SYMBOL_INIT(max_page, "max_page"); + SYMBOL_INIT(xenheap_phys_end, "xenheap_phys_end"); + + /* + * Architecture specific + */ + SYMBOL_INIT(pgd_l2, "idle_pg_table_l2"); /* x86 */ + SYMBOL_INIT(pgd_l3, "idle_pg_table_l3"); /* x86-PAE */ + if (SYMBOL(pgd_l3) == NOT_FOUND_SYMBOL) + SYMBOL_INIT(pgd_l3, "idle_pg_table"); /* x86-PAE */ + SYMBOL_INIT(pgd_l4, "idle_pg_table_4"); /* x86_64 */ + if (SYMBOL(pgd_l4) == NOT_FOUND_SYMBOL) + SYMBOL_INIT(pgd_l4, "idle_pg_table"); /* x86_64 */ + + SYMBOL_INIT(xen_heap_start, "xen_heap_start"); /* ia64 */ + SYMBOL_INIT(xen_pstart, "xen_pstart"); /* ia64 */ + SYMBOL_INIT(frametable_pg_dir, "frametable_pg_dir"); /* ia64 */ + + return TRUE; +} + +int +get_structure_info_xen(void) +{ + SIZE_INIT(page_info, "page_info"); + OFFSET_INIT(page_info.count_info, "page_info", "count_info"); + OFFSET_INIT(page_info._domain, "page_info", "_domain"); + + SIZE_INIT(domain, "domain"); + OFFSET_INIT(domain.domain_id, "domain", "domain_id"); + OFFSET_INIT(domain.next_in_list, "domain", "next_in_list"); + + return TRUE; +} + +int +init_xen_crash_info(void) +{ + off_t offset_xen_crash_info; + unsigned long size_xen_crash_info; + void *buf; + + get_xen_crash_info(&offset_xen_crash_info, &size_xen_crash_info); + if (!size_xen_crash_info) { + info->xen_crash_info_v = -1; + return TRUE; /* missing info is non-fatal */ + } + + if (size_xen_crash_info < sizeof(xen_crash_info_com_t)) { + ERRMSG("Xen crash info too small (%lu bytes).\n", + size_xen_crash_info); + return FALSE; + } + + buf = malloc(size_xen_crash_info); + if (!buf) { + ERRMSG("Can't allocate note (%lu bytes). %s\n", + size_xen_crash_info, strerror(errno)); + return FALSE; + } + + if (lseek(info->fd_memory, offset_xen_crash_info, SEEK_SET) < 0) { + ERRMSG("Can't seek the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + if (read(info->fd_memory, buf, size_xen_crash_info) + != size_xen_crash_info) { + ERRMSG("Can't read the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + + info->xen_crash_info.com = buf; + if (size_xen_crash_info >= sizeof(xen_crash_info_v2_t)) + info->xen_crash_info_v = 2; + else if (size_xen_crash_info >= sizeof(xen_crash_info_t)) + info->xen_crash_info_v = 1; + else + info->xen_crash_info_v = 0; + + return TRUE; +} + +int +get_xen_info(void) +{ + unsigned long domain; + unsigned int domain_id; + int num_domain; + + /* + * Get architecture specific basic data + */ + if (!get_xen_basic_info_arch()) + return FALSE; + + if (!info->xen_crash_info.com || + info->xen_crash_info.com->xen_major_version < 4) { + if (SYMBOL(alloc_bitmap) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of alloc_bitmap.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, SYMBOL(alloc_bitmap), &info->alloc_bitmap, + sizeof(info->alloc_bitmap))) { + ERRMSG("Can't get the value of alloc_bitmap.\n"); + return FALSE; + } + if (SYMBOL(max_page) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of max_page.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, SYMBOL(max_page), &info->max_page, + sizeof(info->max_page))) { + ERRMSG("Can't get the value of max_page.\n"); + return FALSE; + } + } + + /* + * Walk through domain_list + */ + if (SYMBOL(domain_list) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of domain_list.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, SYMBOL(domain_list), &domain, sizeof(domain))){ + ERRMSG("Can't get the value of domain_list.\n"); + return FALSE; + } + + /* + * Get numbers of domain first + */ + num_domain = 0; + while (domain) { + num_domain++; + if (!readmem(VADDR_XEN, domain + OFFSET(domain.next_in_list), + &domain, sizeof(domain))) { + ERRMSG("Can't get through the domain_list.\n"); + return FALSE; + } + } + + if ((info->domain_list = (struct domain_list *) + malloc(sizeof(struct domain_list) * (num_domain + 2))) == NULL) { + ERRMSG("Can't allocate memory for domain_list.\n"); + return FALSE; + } + + info->num_domain = num_domain + 2; + + if (!readmem(VADDR_XEN, SYMBOL(domain_list), &domain, sizeof(domain))) { + ERRMSG("Can't get the value of domain_list.\n"); + return FALSE; + } + num_domain = 0; + while (domain) { + if (!readmem(VADDR_XEN, domain + OFFSET(domain.domain_id), + &domain_id, sizeof(domain_id))) { + ERRMSG("Can't get the domain_id.\n"); + return FALSE; + } + info->domain_list[num_domain].domain_addr = domain; + info->domain_list[num_domain].domain_id = domain_id; + /* + * pickled_id is set by architecture specific + */ + num_domain++; + + if (!readmem(VADDR_XEN, domain + OFFSET(domain.next_in_list), + &domain, sizeof(domain))) { + ERRMSG("Can't get through the domain_list.\n"); + return FALSE; + } + } + + /* + * special domains + */ + if (SYMBOL(dom_xen) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of dom_xen.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, SYMBOL(dom_xen), &domain, sizeof(domain))) { + ERRMSG("Can't get the value of dom_xen.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, domain + OFFSET(domain.domain_id), &domain_id, + sizeof(domain_id))) { + ERRMSG( "Can't get the value of dom_xen domain_id.\n"); + return FALSE; + } + info->domain_list[num_domain].domain_addr = domain; + info->domain_list[num_domain].domain_id = domain_id; + num_domain++; + + if (SYMBOL(dom_io) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of dom_io.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, SYMBOL(dom_io), &domain, sizeof(domain))) { + ERRMSG("Can't get the value of dom_io.\n"); + return FALSE; + } + if (!readmem(VADDR_XEN, domain + OFFSET(domain.domain_id), &domain_id, + sizeof(domain_id))) { + ERRMSG( "Can't get the value of dom_io domain_id.\n"); + return FALSE; + } + info->domain_list[num_domain].domain_addr = domain; + info->domain_list[num_domain].domain_id = domain_id; + + /* + * Get architecture specific data + */ + if (!get_xen_info_arch()) + return FALSE; + + return TRUE; +} + +void +show_data_xen(void) +{ + int i; + + /* + * Show data for debug + */ + MSG("\n"); + MSG("SYMBOL(dom_xen): %llx\n", SYMBOL(dom_xen)); + MSG("SYMBOL(dom_io): %llx\n", SYMBOL(dom_io)); + MSG("SYMBOL(domain_list): %llx\n", SYMBOL(domain_list)); + MSG("SYMBOL(xen_heap_start): %llx\n", SYMBOL(xen_heap_start)); + MSG("SYMBOL(frame_table): %llx\n", SYMBOL(frame_table)); + MSG("SYMBOL(alloc_bitmap): %llx\n", SYMBOL(alloc_bitmap)); + MSG("SYMBOL(max_page): %llx\n", SYMBOL(max_page)); + MSG("SYMBOL(pgd_l2): %llx\n", SYMBOL(pgd_l2)); + MSG("SYMBOL(pgd_l3): %llx\n", SYMBOL(pgd_l3)); + MSG("SYMBOL(pgd_l4): %llx\n", SYMBOL(pgd_l4)); + MSG("SYMBOL(xenheap_phys_end): %llx\n", SYMBOL(xenheap_phys_end)); + MSG("SYMBOL(xen_pstart): %llx\n", SYMBOL(xen_pstart)); + MSG("SYMBOL(frametable_pg_dir): %llx\n", SYMBOL(frametable_pg_dir)); + + MSG("SIZE(page_info): %ld\n", SIZE(page_info)); + MSG("OFFSET(page_info.count_info): %ld\n", OFFSET(page_info.count_info)); + MSG("OFFSET(page_info._domain): %ld\n", OFFSET(page_info._domain)); + MSG("SIZE(domain): %ld\n", SIZE(domain)); + MSG("OFFSET(domain.domain_id): %ld\n", OFFSET(domain.domain_id)); + MSG("OFFSET(domain.next_in_list): %ld\n", OFFSET(domain.next_in_list)); + + MSG("\n"); + if (info->xen_crash_info.com) { + MSG("xen_major_version: %lx\n", + info->xen_crash_info.com->xen_major_version); + MSG("xen_minor_version: %lx\n", + info->xen_crash_info.com->xen_minor_version); + } + MSG("xen_phys_start: %lx\n", info->xen_phys_start); + MSG("frame_table_vaddr: %lx\n", info->frame_table_vaddr); + MSG("xen_heap_start: %lx\n", info->xen_heap_start); + MSG("xen_heap_end:%lx\n", info->xen_heap_end); + MSG("alloc_bitmap: %lx\n", info->alloc_bitmap); + MSG("max_page: %lx\n", info->max_page); + MSG("num_domain: %d\n", info->num_domain); + for (i = 0; i < info->num_domain; i++) { + MSG(" %u: %x: %lx\n", info->domain_list[i].domain_id, + info->domain_list[i].pickled_id, + info->domain_list[i].domain_addr); + } +} + +int +generate_vmcoreinfo_xen(void) +{ + if ((info->page_size = sysconf(_SC_PAGE_SIZE)) <= 0) { + ERRMSG("Can't get the size of page.\n"); + return FALSE; + } + set_dwarf_debuginfo("xen-syms", NULL, + info->name_xen_syms, info->fd_xen_syms); + + if (!get_symbol_info_xen()) + return FALSE; + + if (!get_structure_info_xen()) + return FALSE; + + /* + * write 1st kernel's PAGESIZE + */ + fprintf(info->file_vmcoreinfo, "%s%ld\n", STR_PAGESIZE, + info->page_size); + + /* + * write the symbol of 1st kernel + */ + WRITE_SYMBOL("dom_xen", dom_xen); + WRITE_SYMBOL("dom_io", dom_io); + WRITE_SYMBOL("domain_list", domain_list); + WRITE_SYMBOL("xen_heap_start", xen_heap_start); + WRITE_SYMBOL("frame_table", frame_table); + WRITE_SYMBOL("alloc_bitmap", alloc_bitmap); + WRITE_SYMBOL("max_page", max_page); + WRITE_SYMBOL("pgd_l2", pgd_l2); + WRITE_SYMBOL("pgd_l3", pgd_l3); + WRITE_SYMBOL("pgd_l4", pgd_l4); + WRITE_SYMBOL("xenheap_phys_end", xenheap_phys_end); + WRITE_SYMBOL("xen_pstart", xen_pstart); + WRITE_SYMBOL("frametable_pg_dir", frametable_pg_dir); + + /* + * write the structure size of 1st kernel + */ + WRITE_STRUCTURE_SIZE("page_info", page_info); + WRITE_STRUCTURE_SIZE("domain", domain); + + /* + * write the member offset of 1st kernel + */ + WRITE_MEMBER_OFFSET("page_info.count_info", page_info.count_info); + WRITE_MEMBER_OFFSET("page_info._domain", page_info._domain); + WRITE_MEMBER_OFFSET("domain.domain_id", domain.domain_id); + WRITE_MEMBER_OFFSET("domain.next_in_list", domain.next_in_list); + + return TRUE; +} + +int +read_vmcoreinfo_basic_info_xen(void) +{ + long page_size = FALSE; + char buf[BUFSIZE_FGETS], *endp; + unsigned int i; + + if (fseek(info->file_vmcoreinfo, 0, SEEK_SET) < 0) { + ERRMSG("Can't seek the vmcoreinfo file(%s). %s\n", + info->name_vmcoreinfo, strerror(errno)); + return FALSE; + } + + while (fgets(buf, BUFSIZE_FGETS, info->file_vmcoreinfo)) { + i = strlen(buf); + if (!i) + break; + if (buf[i - 1] == '\n') + buf[i - 1] = '\0'; + if (strncmp(buf, STR_PAGESIZE, strlen(STR_PAGESIZE)) == 0) { + page_size = strtol(buf+strlen(STR_PAGESIZE),&endp,10); + if ((!page_size || page_size == LONG_MAX) + || strlen(endp) != 0) { + ERRMSG("Invalid data in %s: %s", + info->name_vmcoreinfo, buf); + return FALSE; + } + if (!set_page_size(page_size)) { + ERRMSG("Invalid data in %s: %s", + info->name_vmcoreinfo, buf); + return FALSE; + } + break; + } + } + if (!info->page_size) { + ERRMSG("Invalid format in %s", info->name_vmcoreinfo); + return FALSE; + } + return TRUE; +} + +int +read_vmcoreinfo_xen(void) +{ + if (!read_vmcoreinfo_basic_info_xen()) + return FALSE; + + READ_SYMBOL("dom_xen", dom_xen); + READ_SYMBOL("dom_io", dom_io); + READ_SYMBOL("domain_list", domain_list); + READ_SYMBOL("xen_heap_start", xen_heap_start); + READ_SYMBOL("frame_table", frame_table); + READ_SYMBOL("alloc_bitmap", alloc_bitmap); + READ_SYMBOL("max_page", max_page); + READ_SYMBOL("pgd_l2", pgd_l2); + READ_SYMBOL("pgd_l3", pgd_l3); + READ_SYMBOL("pgd_l4", pgd_l4); + READ_SYMBOL("xenheap_phys_end", xenheap_phys_end); + READ_SYMBOL("xen_pstart", xen_pstart); + READ_SYMBOL("frametable_pg_dir", frametable_pg_dir); + + READ_STRUCTURE_SIZE("page_info", page_info); + READ_STRUCTURE_SIZE("domain", domain); + + READ_MEMBER_OFFSET("page_info.count_info", page_info.count_info); + READ_MEMBER_OFFSET("page_info._domain", page_info._domain); + READ_MEMBER_OFFSET("domain.domain_id", domain.domain_id); + READ_MEMBER_OFFSET("domain.next_in_list", domain.next_in_list); + + return TRUE; +} + +int +allocated_in_map(mdf_pfn_t pfn) +{ + static unsigned long long cur_idx = -1; + static unsigned long cur_word; + unsigned long long idx; + + idx = pfn / PAGES_PER_MAPWORD; + if (idx != cur_idx) { + if (!readmem(VADDR_XEN, + info->alloc_bitmap + idx * sizeof(unsigned long), + &cur_word, sizeof(cur_word))) { + ERRMSG("Can't access alloc_bitmap.\n"); + return 0; + } + cur_idx = idx; + } + + return !!(cur_word & (1UL << (pfn & (PAGES_PER_MAPWORD - 1)))); +} + +int +is_select_domain(unsigned int id) +{ + int i; + + /* selected domain is fix to dom0 only now !! + (yes... domain_list is not necessary right now, + it can get from "dom0" directly) */ + + for (i = 0; i < info->num_domain; i++) { + if (info->domain_list[i].domain_id == 0 && + info->domain_list[i].pickled_id == id) + return TRUE; + } + + return FALSE; +} + +int +exclude_xen3_user_domain(void) +{ + int i; + unsigned int count_info, _domain; + unsigned int num_pt_loads = get_num_pt_loads(); + unsigned long page_info_addr; + unsigned long long phys_start, phys_end; + mdf_pfn_t pfn, pfn_end; + mdf_pfn_t j, size; + + /* + * NOTE: the first half of bitmap is not used for Xen extraction + */ + for (i = 0; get_pt_load(i, &phys_start, &phys_end, NULL, NULL); i++) { + + print_progress(PROGRESS_XEN_DOMAIN, i, num_pt_loads, NULL); + + pfn = paddr_to_pfn(phys_start); + pfn_end = paddr_to_pfn(roundup(phys_end, PAGESIZE())); + size = pfn_end - pfn; + + for (j = 0; pfn < pfn_end; pfn++, j++) { + print_progress(PROGRESS_XEN_DOMAIN, j + (size * i), + size * num_pt_loads, NULL); + + if (!allocated_in_map(pfn)) { + clear_bit_on_2nd_bitmap(pfn, NULL); + continue; + } + + page_info_addr = info->frame_table_vaddr + pfn * SIZE(page_info); + if (!readmem(VADDR_XEN, + page_info_addr + OFFSET(page_info.count_info), + &count_info, sizeof(count_info))) { + clear_bit_on_2nd_bitmap(pfn, NULL); + continue; /* page_info may not exist */ + } + if (!readmem(VADDR_XEN, + page_info_addr + OFFSET(page_info._domain), + &_domain, sizeof(_domain))) { + ERRMSG("Can't get page_info._domain.\n"); + return FALSE; + } + /* + * select: + * - anonymous (_domain == 0), or + * - xen heap area, or + * - selected domain page + */ + if (_domain == 0) + continue; + if (info->xen_heap_start <= pfn && pfn < info->xen_heap_end) + continue; + if ((count_info & 0xffff) && is_select_domain(_domain)) + continue; + clear_bit_on_2nd_bitmap(pfn, NULL); + } + } + + return TRUE; +} + +int +exclude_xen4_user_domain(void) +{ + int i; + unsigned long count_info; + unsigned int _domain; + unsigned int num_pt_loads = get_num_pt_loads(); + unsigned long page_info_addr; + unsigned long long phys_start, phys_end; + mdf_pfn_t pfn, pfn_end; + mdf_pfn_t j, size; + + /* + * NOTE: the first half of bitmap is not used for Xen extraction + */ + for (i = 0; get_pt_load(i, &phys_start, &phys_end, NULL, NULL); i++) { + + print_progress(PROGRESS_XEN_DOMAIN, i, num_pt_loads, NULL); + + pfn = paddr_to_pfn(phys_start); + pfn_end = paddr_to_pfn(roundup(phys_end, PAGESIZE())); + size = pfn_end - pfn; + + for (j = 0; pfn < pfn_end; pfn++, j++) { + print_progress(PROGRESS_XEN_DOMAIN, j + (size * i), + size * num_pt_loads, NULL); + + page_info_addr = info->frame_table_vaddr + pfn * SIZE(page_info); + if (!readmem(VADDR_XEN, + page_info_addr + OFFSET(page_info.count_info), + &count_info, sizeof(count_info))) { + clear_bit_on_2nd_bitmap(pfn, NULL); + continue; /* page_info may not exist */ + } + + /* always keep Xen heap pages */ + if (count_info & PGC_xen_heap) + continue; + + /* delete free, offlined and broken pages */ + if (page_state_is(count_info, free) || + page_state_is(count_info, offlined) || + count_info & PGC_broken) { + clear_bit_on_2nd_bitmap(pfn, NULL); + continue; + } + + /* keep inuse pages not allocated to any domain + * this covers e.g. Xen static data + */ + if (! (count_info & PGC_allocated)) + continue; + + /* Need to check the domain + * keep: + * - anonymous (_domain == 0), or + * - selected domain page + */ + if (!readmem(VADDR_XEN, + page_info_addr + OFFSET(page_info._domain), + &_domain, sizeof(_domain))) { + ERRMSG("Can't get page_info._domain.\n"); + return FALSE; + } + + if (_domain == 0) + continue; + if (is_select_domain(_domain)) + continue; + clear_bit_on_2nd_bitmap(pfn, NULL); + } + } + + return TRUE; +} + +int +exclude_xen_user_domain(void) +{ + struct timeval tv_start; + int ret; + + gettimeofday(&tv_start, NULL); + + if (info->xen_crash_info.com && + info->xen_crash_info.com->xen_major_version >= 4) + ret = exclude_xen4_user_domain(); + else + ret = exclude_xen3_user_domain(); + + /* + * print [100 %] + */ + print_progress(PROGRESS_XEN_DOMAIN, 1, 1, NULL); + print_execution_time(PROGRESS_XEN_DOMAIN, &tv_start); + + return ret; +} + +int +initial_xen(void) +{ +#if defined(__powerpc64__) || defined(__powerpc32__) + MSG("\n"); + MSG("Xen is not supported on powerpc.\n"); + return FALSE; +#else + int xen_info_required = TRUE; + off_t offset; + unsigned long size; + +#ifndef __x86_64__ + if (DL_EXCLUDE_ZERO < info->max_dump_level) { + MSG("Dump_level is invalid. It should be 0 or 1.\n"); + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + return FALSE; + } +#endif + if (is_xen_memory()) { + if(info->flag_cyclic) { + info->flag_cyclic = FALSE; + } + } + + if (!init_xen_crash_info()) + return FALSE; + /* + * Get the debug information for analysis from the vmcoreinfo file + */ + if (info->flag_read_vmcoreinfo) { + if (!read_vmcoreinfo_xen()) + return FALSE; + close_vmcoreinfo(); + /* + * Get the debug information for analysis from the xen-syms file + */ + } else if (info->name_xen_syms) { + set_dwarf_debuginfo("xen-syms", NULL, + info->name_xen_syms, info->fd_xen_syms); + + if (!get_symbol_info_xen()) + return FALSE; + if (!get_structure_info_xen()) + return FALSE; + /* + * Get the debug information for analysis from /proc/vmcore + */ + } else { + /* + * Check whether /proc/vmcore contains vmcoreinfo, + * and get both the offset and the size. + */ + if (!has_vmcoreinfo_xen()){ + if (!info->flag_exclude_xen_dom) { + xen_info_required = FALSE; + goto out; + } + + MSG("%s doesn't contain a vmcoreinfo for Xen.\n", + info->name_memory); + MSG("Specify '--xen-syms' option or '--xen-vmcoreinfo' option.\n"); + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + return FALSE; + } + /* + * Get the debug information from /proc/vmcore + */ + get_vmcoreinfo_xen(&offset, &size); + if (!read_vmcoreinfo_from_vmcore(offset, size, TRUE)) + return FALSE; + } + +out: + if (!info->page_size) { + /* + * If we cannot get page_size from a vmcoreinfo file, + * fall back to the current kernel page size. + */ + if (!fallback_to_current_page_size()) + return FALSE; + } + + if (!cache_init()) + return FALSE; + + if (xen_info_required == TRUE) { + if (!get_xen_info()) + return FALSE; + + if (message_level & ML_PRINT_DEBUG_MSG) + show_data_xen(); + } + + if (!get_max_mapnr()) + return FALSE; + + return TRUE; +#endif +} + +void +print_vtop(void) +{ + unsigned long long paddr; + + if (!info->vaddr_for_vtop) + return; + + MSG("\n"); + MSG("Translating virtual address %lx to physical address.\n", info->vaddr_for_vtop); + + paddr = vaddr_to_paddr(info->vaddr_for_vtop); + + MSG("VIRTUAL PHYSICAL\n"); + MSG("%16lx %llx\n", info->vaddr_for_vtop, paddr); + MSG("\n"); + + info->vaddr_for_vtop = 0; + + return; +} + +void +print_report(void) +{ + mdf_pfn_t pfn_original, pfn_excluded, shrinking; + + /* + * /proc/vmcore doesn't contain the memory hole area. + */ + pfn_original = info->max_mapnr - pfn_memhole; + + pfn_excluded = pfn_zero + pfn_cache + pfn_cache_private + + pfn_user + pfn_free + pfn_hwpoison; + shrinking = (pfn_original - pfn_excluded) * 100; + shrinking = shrinking / pfn_original; + + REPORT_MSG("\n"); + REPORT_MSG("Original pages : 0x%016llx\n", pfn_original); + REPORT_MSG(" Excluded pages : 0x%016llx\n", pfn_excluded); + REPORT_MSG(" Pages filled with zero : 0x%016llx\n", pfn_zero); + REPORT_MSG(" Non-private cache pages : 0x%016llx\n", pfn_cache); + REPORT_MSG(" Private cache pages : 0x%016llx\n", + pfn_cache_private); + REPORT_MSG(" User process data pages : 0x%016llx\n", pfn_user); + REPORT_MSG(" Free pages : 0x%016llx\n", pfn_free); + REPORT_MSG(" Hwpoison pages : 0x%016llx\n", pfn_hwpoison); + REPORT_MSG(" Remaining pages : 0x%016llx\n", + pfn_original - pfn_excluded); + REPORT_MSG(" (The number of pages is reduced to %lld%%.)\n", + shrinking); + REPORT_MSG("Memory Hole : 0x%016llx\n", pfn_memhole); + REPORT_MSG("--------------------------------------------------\n"); + REPORT_MSG("Total pages : 0x%016llx\n", info->max_mapnr); + REPORT_MSG("\n"); + REPORT_MSG("Cache hit: %lld, miss: %lld", cache_hit, cache_miss); + if (cache_hit + cache_miss) + REPORT_MSG(", hit rate: %.1f%%", + 100.0 * cache_hit / (cache_hit + cache_miss)); + REPORT_MSG("\n\n"); +} + +static void +print_mem_usage(void) +{ + mdf_pfn_t pfn_original, pfn_excluded, shrinking; + unsigned long long total_size; + + /* + * /proc/vmcore doesn't contain the memory hole area. + */ + pfn_original = info->max_mapnr - pfn_memhole; + + pfn_excluded = pfn_zero + pfn_cache + pfn_cache_private + + pfn_user + pfn_free + pfn_hwpoison; + shrinking = (pfn_original - pfn_excluded) * 100; + shrinking = shrinking / pfn_original; + total_size = info->page_size * pfn_original; + + MSG("\n"); + MSG("TYPE PAGES EXCLUDABLE DESCRIPTION\n"); + MSG("----------------------------------------------------------------------\n"); + + MSG("ZERO %-16llu yes Pages filled with zero\n", pfn_zero); + MSG("NON_PRI_CACHE %-16llu yes Cache pages without private flag\n", + pfn_cache); + MSG("PRI_CACHE %-16llu yes Cache pages with private flag\n", + pfn_cache_private); + MSG("USER %-16llu yes User process pages\n", pfn_user); + MSG("FREE %-16llu yes Free pages\n", pfn_free); + MSG("KERN_DATA %-16llu no Dumpable kernel data \n", + pfn_original - pfn_excluded); + + MSG("\n"); + + MSG("page size: %-16ld\n", info->page_size); + MSG("Total pages on system: %-16llu\n", pfn_original); + MSG("Total size on system: %-16llu Byte\n", total_size); +} + +int +writeout_dumpfile(void) +{ + int ret = FALSE; + struct cache_data cd_header, cd_page; + + info->flag_nospace = FALSE; + + if (!open_dump_file()) + return FALSE; + + if (info->flag_flatten) { + if (!write_start_flat_header()) + return FALSE; + } + if (!prepare_cache_data(&cd_header)) + return FALSE; + + if (!prepare_cache_data(&cd_page)) { + free_cache_data(&cd_header); + return FALSE; + } + if (info->flag_elf_dumpfile) { + if (!write_elf_header(&cd_header)) + goto out; + if (!write_elf_pages_cyclic(&cd_header, &cd_page)) + goto write_cache_enospc; + if (!write_elf_eraseinfo(&cd_header)) + goto out; + } else { + if (!write_kdump_header()) + goto out; + if (!write_kdump_pages_and_bitmap_cyclic(&cd_header, &cd_page)) + goto out; + if (!write_kdump_eraseinfo(&cd_page)) + goto out; + } + if (info->flag_flatten) { + if (!write_end_flat_header()) + goto out; + } + + ret = TRUE; +write_cache_enospc: + if ((ret == FALSE) && info->flag_nospace && !info->flag_flatten) { + if (!write_cache_bufsz(&cd_header)) + ERRMSG("This dumpfile may lost some important data.\n"); + } +out: + free_cache_data(&cd_header); + free_cache_data(&cd_page); + + close_dump_file(); + + if ((ret == FALSE) && info->flag_nospace) + return NOSPACE; + else + return ret; +} + +/* + * calculate end_pfn of one dumpfile. + * try to make every output file have the same size. + * splitblock_table is used to reduce calculate time. + */ + +#define CURRENT_SPLITBLOCK_PFN_NUM (*cur_splitblock_num * splitblock->page_per_splitblock) +mdf_pfn_t +calculate_end_pfn_by_splitblock(mdf_pfn_t start_pfn, + int *cur_splitblock_num) +{ + if (start_pfn >= info->max_mapnr) + return info->max_mapnr; + + mdf_pfn_t end_pfn; + long long pfn_needed, offset; + char *splitblock_value_offset; + + pfn_needed = info->num_dumpable / info->num_dumpfile; + offset = *cur_splitblock_num * splitblock->entry_size; + splitblock_value_offset = splitblock->table + offset; + end_pfn = start_pfn; + + while (*cur_splitblock_num < splitblock->num && pfn_needed > 0) { + pfn_needed -= read_from_splitblock_table(splitblock_value_offset); + splitblock_value_offset += splitblock->entry_size; + ++*cur_splitblock_num; + } + + end_pfn = CURRENT_SPLITBLOCK_PFN_NUM; + if (end_pfn > info->max_mapnr) + end_pfn = info->max_mapnr; + + return end_pfn; +} + +/* + * calculate start_pfn and end_pfn in each output file. + */ +static int setup_splitting(void) +{ + int i; + mdf_pfn_t start_pfn, end_pfn; + int cur_splitblock_num = 0; + start_pfn = end_pfn = 0; + + if (info->num_dumpfile <= 1) + return FALSE; + + for (i = 0; i < info->num_dumpfile - 1; i++) { + start_pfn = end_pfn; + end_pfn = calculate_end_pfn_by_splitblock(start_pfn, + &cur_splitblock_num); + SPLITTING_START_PFN(i) = start_pfn; + SPLITTING_END_PFN(i) = end_pfn; + } + SPLITTING_START_PFN(info->num_dumpfile - 1) = end_pfn; + SPLITTING_END_PFN(info->num_dumpfile - 1) = info->max_mapnr; + + return TRUE; +} + +/* + * This function is for creating split dumpfiles by multiple + * processes. Each child process should re-open a /proc/vmcore + * file, because it prevents each other from affectting the file + * offset due to read(2) call. + */ +int +reopen_dump_memory() +{ + close_dump_memory(); + + if ((info->fd_memory = open(info->name_memory, O_RDONLY)) < 0) { + ERRMSG("Can't open the dump memory(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + return TRUE; +} + +int +get_next_dump_level(int index) +{ + if (info->num_dump_level <= index) + return -1; + + return info->array_dump_level[index]; +} + +int +delete_dumpfile(void) +{ + int i; + + if (info->flag_flatten) + return TRUE; + + if (info->flag_split) { + for (i = 0; i < info->num_dumpfile; i++) + unlink(SPLITTING_DUMPFILE(i)); + } else { + unlink(info->name_dumpfile); + } + return TRUE; +} + +int +writeout_multiple_dumpfiles(void) +{ + int i, status, ret = TRUE; + pid_t pid; + pid_t array_pid[info->num_dumpfile]; + + if (!setup_splitting()) + return FALSE; + + for (i = 0; i < info->num_dumpfile; i++) { + if ((pid = fork()) < 0) { + return FALSE; + + } else if (pid == 0) { /* Child */ + info->name_dumpfile = SPLITTING_DUMPFILE(i); + info->fd_bitmap = SPLITTING_FD_BITMAP(i); + info->split_start_pfn = SPLITTING_START_PFN(i); + info->split_end_pfn = SPLITTING_END_PFN(i); + + if (!reopen_dump_memory()) + exit(1); + if ((status = writeout_dumpfile()) == FALSE) + exit(1); + else if (status == NOSPACE) + exit(2); + exit(0); + } + array_pid[i] = pid; + } + for (i = 0; i < info->num_dumpfile; i++) { + waitpid(array_pid[i], &status, WUNTRACED); + if (!WIFEXITED(status) || WEXITSTATUS(status) == 1) { + ERRMSG("Child process(%d) finished incompletely.(%d)\n", + array_pid[i], status); + ret = FALSE; + } else if ((ret == TRUE) && (WEXITSTATUS(status) == 2)) + ret = NOSPACE; + } + return ret; +} + +void +update_dump_level(void) +{ + int new_level; + + new_level = info->dump_level | info->kh_memory->dump_level; + if (new_level != info->dump_level) { + info->dump_level = new_level; + MSG("dump_level is changed to %d, " \ + "because %s was created by dump_level(%d).", + new_level, info->name_memory, + info->kh_memory->dump_level); + } +} + +int +create_dumpfile(void) +{ + int num_retry, status; + + if (!open_files_for_creating_dumpfile()) + return FALSE; + + if (!info->flag_refiltering && !info->flag_sadump) { + if (!get_elf_info(info->fd_memory, info->name_memory)) + return FALSE; + } + + if (info->flag_refiltering) + update_dump_level(); + + if (!initial()) + return FALSE; + + if (!open_dump_bitmap()) + return FALSE; + + /* create an array of translations from pfn to vmemmap pages */ + if (info->flag_excludevm) { + if (find_vmemmap() == FAILED) { + ERRMSG("Can't find vmemmap pages\n"); + info->flag_excludevm = 0; + } + } + + print_vtop(); + + num_retry = 0; +retry: + if (info->flag_refiltering) + update_dump_level(); + + if ((info->name_filterconfig || info->name_eppic_config) + && !gather_filter_info()) + return FALSE; + + if (!create_dump_bitmap()) + return FALSE; + + if (info->flag_split) { + if ((status = writeout_multiple_dumpfiles()) == FALSE) + return FALSE; + } else { + if ((status = writeout_dumpfile()) == FALSE) + return FALSE; + } + if (status == NOSPACE) { + /* + * If specifying the other dump_level, makedumpfile tries + * to create a dumpfile with it again. + */ + num_retry++; + if ((info->dump_level = get_next_dump_level(num_retry)) < 0) { + if (!info->flag_flatten) { + if (check_and_modify_headers()) + MSG("This is an incomplete dumpfile," + " but might analyzable.\n"); + } + + return FALSE; + } + MSG("Retry to create a dumpfile by dump_level(%d).\n", + info->dump_level); + if (!delete_dumpfile()) + return FALSE; + goto retry; + } + print_report(); + + clear_filter_info(); + if (!close_files_for_creating_dumpfile()) + return FALSE; + + return TRUE; +} + +int +__read_disk_dump_header(struct disk_dump_header *dh, char *filename) +{ + int fd, ret = FALSE; + + if ((fd = open(filename, O_RDONLY)) < 0) { + ERRMSG("Can't open a file(%s). %s\n", + filename, strerror(errno)); + return FALSE; + } + if (lseek(fd, 0x0, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + filename, strerror(errno)); + goto out; + } + if (read(fd, dh, sizeof(struct disk_dump_header)) + != sizeof(struct disk_dump_header)) { + ERRMSG("Can't read a file(%s). %s\n", + filename, strerror(errno)); + goto out; + } + ret = TRUE; +out: + close(fd); + + return ret; +} + +int +read_disk_dump_header(struct disk_dump_header *dh, char *filename) +{ + if (!__read_disk_dump_header(dh, filename)) + return FALSE; + + if (strncmp(dh->signature, KDUMP_SIGNATURE, strlen(KDUMP_SIGNATURE))) { + ERRMSG("%s is not the kdump-compressed format.\n", + filename); + return FALSE; + } + return TRUE; +} + +int +read_kdump_sub_header(struct kdump_sub_header *kh, char *filename) +{ + int fd, ret = FALSE; + struct disk_dump_header dh; + off_t offset; + + if (!read_disk_dump_header(&dh, filename)) + return FALSE; + + offset = DISKDUMP_HEADER_BLOCKS * dh.block_size; + + if ((fd = open(filename, O_RDONLY)) < 0) { + ERRMSG("Can't open a file(%s). %s\n", + filename, strerror(errno)); + return FALSE; + } + if (lseek(fd, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + filename, strerror(errno)); + goto out; + } + if (read(fd, kh, sizeof(struct kdump_sub_header)) + != sizeof(struct kdump_sub_header)) { + ERRMSG("Can't read a file(%s). %s\n", + filename, strerror(errno)); + goto out; + } + ret = TRUE; +out: + close(fd); + + return ret; +} + +int +store_splitting_info(void) +{ + int i; + struct disk_dump_header dh, tmp_dh; + struct kdump_sub_header kh; + + for (i = 0; i < info->num_dumpfile; i++) { + if (!read_disk_dump_header(&tmp_dh, SPLITTING_DUMPFILE(i))) + return FALSE; + + if (i == 0) { + memcpy(&dh, &tmp_dh, sizeof(tmp_dh)); + if (!set_page_size(dh.block_size)) + return FALSE; + DEBUG_MSG("page_size : %ld\n", info->page_size); + } + + /* + * Check whether multiple dumpfiles are parts of + * the same /proc/vmcore. + */ + if (memcmp(&dh, &tmp_dh, sizeof(tmp_dh))) { + ERRMSG("Invalid dumpfile(%s).\n", + SPLITTING_DUMPFILE(i)); + return FALSE; + } + if (!read_kdump_sub_header(&kh, SPLITTING_DUMPFILE(i))) + return FALSE; + + if (i == 0) { + if (dh.header_version >= 6) + info->max_mapnr = kh.max_mapnr_64; + else + info->max_mapnr = dh.max_mapnr; + + DEBUG_MSG("max_mapnr : %llx\n", info->max_mapnr); + + info->dump_level = kh.dump_level; + DEBUG_MSG("dump_level : %d\n", info->dump_level); + } + + if (dh.header_version >= 6) { + SPLITTING_START_PFN(i) = kh.start_pfn_64; + SPLITTING_END_PFN(i) = kh.end_pfn_64; + } else { + SPLITTING_START_PFN(i) = kh.start_pfn; + SPLITTING_END_PFN(i) = kh.end_pfn; + } + SPLITTING_OFFSET_EI(i) = kh.offset_eraseinfo; + SPLITTING_SIZE_EI(i) = kh.size_eraseinfo; + } + return TRUE; +} + +void +sort_splitting_info(void) +{ + int i, j; + mdf_pfn_t start_pfn, end_pfn; + char *name_dumpfile; + + /* + * Sort splitting_info by start_pfn. + */ + for (i = 0; i < (info->num_dumpfile - 1); i++) { + for (j = i; j < info->num_dumpfile; j++) { + if (SPLITTING_START_PFN(i) < SPLITTING_START_PFN(j)) + continue; + start_pfn = SPLITTING_START_PFN(i); + end_pfn = SPLITTING_END_PFN(i); + name_dumpfile = SPLITTING_DUMPFILE(i); + + SPLITTING_START_PFN(i) = SPLITTING_START_PFN(j); + SPLITTING_END_PFN(i) = SPLITTING_END_PFN(j); + SPLITTING_DUMPFILE(i) = SPLITTING_DUMPFILE(j); + + SPLITTING_START_PFN(j) = start_pfn; + SPLITTING_END_PFN(j) = end_pfn; + SPLITTING_DUMPFILE(j) = name_dumpfile; + } + } + + DEBUG_MSG("num_dumpfile : %d\n", info->num_dumpfile); + for (i = 0; i < info->num_dumpfile; i++) { + DEBUG_MSG("dumpfile (%s)\n", SPLITTING_DUMPFILE(i)); + DEBUG_MSG(" start_pfn : %llx\n", SPLITTING_START_PFN(i)); + DEBUG_MSG(" end_pfn : %llx\n", SPLITTING_END_PFN(i)); + } +} + +int +check_splitting_info(void) +{ + int i; + mdf_pfn_t end_pfn; + + /* + * Check whether there are not lack of /proc/vmcore. + */ + if (SPLITTING_START_PFN(0) != 0) { + ERRMSG("There is not dumpfile corresponding to pfn 0x%x - 0x%llx.\n", + 0x0, SPLITTING_START_PFN(0)); + return FALSE; + } + end_pfn = SPLITTING_END_PFN(0); + + for (i = 1; i < info->num_dumpfile; i++) { + if (end_pfn != SPLITTING_START_PFN(i)) { + ERRMSG("There is not dumpfile corresponding to pfn 0x%llx - 0x%llx.\n", + end_pfn, SPLITTING_START_PFN(i)); + return FALSE; + } + end_pfn = SPLITTING_END_PFN(i); + } + if (end_pfn != info->max_mapnr) { + ERRMSG("There is not dumpfile corresponding to pfn 0x%llx - 0x%llx.\n", + end_pfn, info->max_mapnr); + return FALSE; + } + + return TRUE; +} + +int +get_splitting_info(void) +{ + if (!store_splitting_info()) + return FALSE; + + sort_splitting_info(); + + if (!check_splitting_info()) + return FALSE; + + if (!get_kdump_compressed_header_info(SPLITTING_DUMPFILE(0))) + return FALSE; + + return TRUE; +} + +int +copy_same_data(int src_fd, int dst_fd, off_t offset, unsigned long size) +{ + int ret = FALSE; + char *buf = NULL; + + if ((buf = malloc(size)) == NULL) { + ERRMSG("Can't allocate memory.\n"); + return FALSE; + } + if (lseek(src_fd, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek a source file. %s\n", strerror(errno)); + goto out; + } + if (read(src_fd, buf, size) != size) { + ERRMSG("Can't read a source file. %s\n", strerror(errno)); + goto out; + } + if (lseek(dst_fd, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek a destination file. %s\n", strerror(errno)); + goto out; + } + if (write(dst_fd, buf, size) != size) { + ERRMSG("Can't write a destination file. %s\n", strerror(errno)); + goto out; + } + ret = TRUE; +out: + free(buf); + return ret; +} + +int +reassemble_kdump_header(void) +{ + int fd = -1, ret = FALSE; + off_t offset; + unsigned long size; + struct disk_dump_header dh; + struct kdump_sub_header kh; + char *buf_bitmap = NULL; + ssize_t status, read_size, written_size; + + /* + * Write common header. + */ + int i; + for ( i = 0; i < info->num_dumpfile; i++){ + if (!read_disk_dump_header(&dh, SPLITTING_DUMPFILE(i))) + return FALSE; + int status = dh.status & DUMP_DH_COMPRESSED_INCOMPLETE; + if (status) + break; + } + + if (lseek(info->fd_dumpfile, 0x0, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + info->name_dumpfile, strerror(errno)); + return FALSE; + } + if (write(info->fd_dumpfile, &dh, sizeof(dh)) != sizeof(dh)) { + ERRMSG("Can't write a file(%s). %s\n", + info->name_dumpfile, strerror(errno)); + return FALSE; + } + + /* + * Write sub header. + */ + if (!read_kdump_sub_header(&kh, SPLITTING_DUMPFILE(0))) + return FALSE; + + kh.split = 0; + kh.start_pfn = 0; + kh.end_pfn = 0; + kh.start_pfn_64 = 0; + kh.end_pfn_64 = 0; + + if (lseek(info->fd_dumpfile, info->page_size, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + info->name_dumpfile, strerror(errno)); + return FALSE; + } + if (write(info->fd_dumpfile, &kh, sizeof(kh)) != sizeof(kh)) { + ERRMSG("Can't write a file(%s). %s\n", + info->name_dumpfile, strerror(errno)); + return FALSE; + } + memcpy(&info->sub_header, &kh, sizeof(kh)); + + if ((fd = open(SPLITTING_DUMPFILE(0), O_RDONLY)) < 0) { + ERRMSG("Can't open a file(%s). %s\n", + SPLITTING_DUMPFILE(0), strerror(errno)); + return FALSE; + } + if (has_pt_note()) { + get_pt_note(&offset, &size); + if (!copy_same_data(fd, info->fd_dumpfile, offset, size)) { + ERRMSG("Can't copy pt_note data to %s.\n", + info->name_dumpfile); + goto out; + } + } + if (has_vmcoreinfo()) { + get_vmcoreinfo(&offset, &size); + if (!copy_same_data(fd, info->fd_dumpfile, offset, size)) { + ERRMSG("Can't copy vmcoreinfo data to %s.\n", + info->name_dumpfile); + goto out; + } + } + + /* + * Write dump bitmap to both a dumpfile and a bitmap file. + */ + offset = (DISKDUMP_HEADER_BLOCKS + dh.sub_hdr_size) * dh.block_size; + info->len_bitmap = dh.bitmap_blocks * dh.block_size; + if ((buf_bitmap = malloc(info->len_bitmap)) == NULL) { + ERRMSG("Can't allocate memory for bitmap.\n"); + goto out; + } + if (lseek(fd, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + SPLITTING_DUMPFILE(0), strerror(errno)); + goto out; + } + read_size = 0; + while (read_size < info->len_bitmap) { + status = read(fd, buf_bitmap + read_size, info->len_bitmap + - read_size); + if (status < 0) { + ERRMSG("Can't read a file(%s). %s\n", + SPLITTING_DUMPFILE(0), strerror(errno)); + goto out; + } + read_size += status; + } + + if (lseek(info->fd_dumpfile, offset, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + info->name_dumpfile, strerror(errno)); + goto out; + } + written_size = 0; + while (written_size < info->len_bitmap) { + status = write(info->fd_dumpfile, buf_bitmap + written_size, + info->len_bitmap - written_size); + if (status < 0) { + ERRMSG("Can't write a file(%s). %s\n", + info->name_dumpfile, strerror(errno)); + goto out; + } + written_size += status; + } + + if (lseek(info->fd_bitmap, 0x0, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + info->name_bitmap, strerror(errno)); + goto out; + } + written_size = 0; + while (written_size < info->len_bitmap) { + status = write(info->fd_bitmap, buf_bitmap + written_size, + info->len_bitmap - written_size); + if (status < 0) { + ERRMSG("Can't write a file(%s). %s\n", + info->name_bitmap, strerror(errno)); + goto out; + } + written_size += status; + } + + ret = TRUE; +out: + if (fd >= 0) + close(fd); + free(buf_bitmap); + + return ret; +} + +int +reassemble_kdump_pages(void) +{ + int i, fd = -1, ret = FALSE; + off_t offset_first_ph, offset_ph_org, offset_eraseinfo; + off_t offset_data_new, offset_zero_page = 0; + mdf_pfn_t pfn, start_pfn, end_pfn; + mdf_pfn_t num_dumpable; + unsigned long size_eraseinfo; + struct disk_dump_header dh; + struct page_desc pd, pd_zero; + struct cache_data cd_pd, cd_data; + struct timeval tv_start; + char *data = NULL; + unsigned long data_buf_size = info->page_size; + + if (!prepare_bitmap2_buffer()) + return FALSE; + + if (!read_disk_dump_header(&dh, SPLITTING_DUMPFILE(0))) + return FALSE; + + if (!prepare_cache_data(&cd_pd)) + return FALSE; + + if (!prepare_cache_data(&cd_data)) { + free_cache_data(&cd_pd); + return FALSE; + } + if ((data = malloc(data_buf_size)) == NULL) { + ERRMSG("Can't allocate memory for page data.\n"); + free_cache_data(&cd_pd); + free_cache_data(&cd_data); + return FALSE; + } + num_dumpable = get_num_dumpable(); + num_dumped = 0; + + offset_first_ph + = (DISKDUMP_HEADER_BLOCKS + dh.sub_hdr_size + dh.bitmap_blocks) + * dh.block_size; + cd_pd.offset = offset_first_ph; + offset_data_new = offset_first_ph + sizeof(page_desc_t) * num_dumpable; + cd_data.offset = offset_data_new; + + /* + * Write page header of zero-filled page. + */ + gettimeofday(&tv_start, NULL); + if (info->dump_level & DL_EXCLUDE_ZERO) { + /* + * makedumpfile outputs the data of zero-filled page at first + * if excluding zero-filled page, so the offset of first data + * is for zero-filled page in all dumpfiles. + */ + offset_zero_page = offset_data_new; + + pd_zero.size = info->page_size; + pd_zero.flags = 0; + pd_zero.offset = offset_data_new; + pd_zero.page_flags = 0; + memset(data, 0, pd_zero.size); + if (!write_cache(&cd_data, data, pd_zero.size)) + goto out; + offset_data_new += pd_zero.size; + } + + for (i = 0; i < info->num_dumpfile; i++) { + if ((fd = open(SPLITTING_DUMPFILE(i), O_RDONLY)) < 0) { + ERRMSG("Can't open a file(%s). %s\n", + SPLITTING_DUMPFILE(i), strerror(errno)); + goto out; + } + start_pfn = SPLITTING_START_PFN(i); + end_pfn = SPLITTING_END_PFN(i); + + offset_ph_org = offset_first_ph; + for (pfn = start_pfn; pfn < end_pfn; pfn++) { + if (!is_dumpable(info->bitmap2, pfn, NULL)) + continue; + + num_dumped++; + + print_progress(PROGRESS_COPY, num_dumped, num_dumpable, &tv_start); + + if (lseek(fd, offset_ph_org, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + SPLITTING_DUMPFILE(i), strerror(errno)); + goto out; + } + if (read(fd, &pd, sizeof(pd)) != sizeof(pd)) { + ERRMSG("Can't read a file(%s). %s\n", + SPLITTING_DUMPFILE(i), strerror(errno)); + goto out; + } + if (lseek(fd, pd.offset, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + SPLITTING_DUMPFILE(i), strerror(errno)); + goto out; + } + if (read(fd, data, pd.size) != pd.size) { + ERRMSG("Can't read a file(%s). %s\n", + SPLITTING_DUMPFILE(i), strerror(errno)); + goto out; + } + if ((info->dump_level & DL_EXCLUDE_ZERO) + && (pd.offset == offset_zero_page)) { + /* + * Handle the data of zero-filled page. + */ + if (!write_cache(&cd_pd, &pd_zero, + sizeof(pd_zero))) + goto out; + offset_ph_org += sizeof(pd); + continue; + } + pd.offset = offset_data_new; + if (!write_cache(&cd_pd, &pd, sizeof(pd))) + goto out; + offset_ph_org += sizeof(pd); + + if (!write_cache(&cd_data, data, pd.size)) + goto out; + + offset_data_new += pd.size; + } + close(fd); + fd = -1; + } + if (!write_cache_bufsz(&cd_pd)) + goto out; + if (!write_cache_bufsz(&cd_data)) + goto out; + + offset_eraseinfo = cd_data.offset; + size_eraseinfo = 0; + /* Copy eraseinfo from split dumpfiles to o/p dumpfile */ + for (i = 0; i < info->num_dumpfile; i++) { + if (!SPLITTING_SIZE_EI(i)) + continue; + + if (SPLITTING_SIZE_EI(i) > data_buf_size) { + data_buf_size = SPLITTING_SIZE_EI(i); + if ((data = realloc(data, data_buf_size)) == NULL) { + ERRMSG("Can't allocate memory for eraseinfo" + " data.\n"); + goto out; + } + } + if ((fd = open(SPLITTING_DUMPFILE(i), O_RDONLY)) < 0) { + ERRMSG("Can't open a file(%s). %s\n", + SPLITTING_DUMPFILE(i), strerror(errno)); + goto out; + } + if (lseek(fd, SPLITTING_OFFSET_EI(i), SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + SPLITTING_DUMPFILE(i), strerror(errno)); + goto out; + } + if (read(fd, data, SPLITTING_SIZE_EI(i)) != + SPLITTING_SIZE_EI(i)) { + ERRMSG("Can't read a file(%s). %s\n", + SPLITTING_DUMPFILE(i), strerror(errno)); + goto out; + } + if (!write_cache(&cd_data, data, SPLITTING_SIZE_EI(i))) + goto out; + size_eraseinfo += SPLITTING_SIZE_EI(i); + + close(fd); + fd = -1; + } + if (size_eraseinfo) { + if (!write_cache_bufsz(&cd_data)) + goto out; + + if (!update_eraseinfo_of_sub_header(offset_eraseinfo, + size_eraseinfo)) + goto out; + } + print_progress(PROGRESS_COPY, num_dumpable, num_dumpable, &tv_start); + print_execution_time(PROGRESS_COPY, &tv_start); + + ret = TRUE; +out: + free_cache_data(&cd_pd); + free_cache_data(&cd_data); + free_bitmap2_buffer(); + + if (data) + free(data); + if (fd >= 0) + close(fd); + + return ret; +} + +int +reassemble_dumpfile(void) +{ + if (!get_splitting_info()) + return FALSE; + + if (!open_dump_bitmap()) + return FALSE; + + if (!open_dump_file()) + return FALSE; + + if (!reassemble_kdump_header()) + return FALSE; + + if (!reassemble_kdump_pages()) + return FALSE; + + close_dump_file(); + close_dump_bitmap(); + + return TRUE; +} + +int +check_param_for_generating_vmcoreinfo(int argc, char *argv[]) +{ + if (argc != optind) + return FALSE; + + if (info->flag_compress || info->dump_level + || info->flag_elf_dumpfile || info->flag_read_vmcoreinfo + || info->flag_flatten || info->flag_rearrange + || info->flag_exclude_xen_dom + || (!info->name_vmlinux && !info->name_xen_syms)) + + return FALSE; + + return TRUE; +} + +/* + * Parameters for creating dumpfile from the dump data + * of flattened format by rearranging the dump data. + */ +int +check_param_for_rearranging_dumpdata(int argc, char *argv[]) +{ + if (argc != optind + 1) + return FALSE; + + if (info->flag_compress || info->dump_level + || info->flag_elf_dumpfile || info->flag_read_vmcoreinfo + || info->name_vmlinux || info->name_xen_syms + || info->flag_flatten || info->flag_generate_vmcoreinfo + || info->flag_exclude_xen_dom) + return FALSE; + + info->name_dumpfile = argv[optind]; + return TRUE; +} + +/* + * Parameters for reassembling multiple dumpfiles into one dumpfile. + */ +int +check_param_for_reassembling_dumpfile(int argc, char *argv[]) +{ + int i; + + info->num_dumpfile = argc - optind - 1; + info->name_dumpfile = argv[argc - 1]; + + DEBUG_MSG("num_dumpfile : %d\n", info->num_dumpfile); + + if (info->flag_compress || info->dump_level + || info->flag_elf_dumpfile || info->flag_read_vmcoreinfo + || info->name_vmlinux || info->name_xen_syms + || info->flag_flatten || info->flag_generate_vmcoreinfo + || info->flag_exclude_xen_dom || info->flag_split) + return FALSE; + + if ((info->splitting_info + = malloc(sizeof(splitting_info_t) * info->num_dumpfile)) + == NULL) { + MSG("Can't allocate memory for splitting_info.\n"); + return FALSE; + } + for (i = 0; i < info->num_dumpfile; i++) + SPLITTING_DUMPFILE(i) = argv[optind + i]; + + return TRUE; +} + +/* + * Check parameters to create the dump file. + */ +int +check_param_for_creating_dumpfile(int argc, char *argv[]) +{ + int i; + + if (info->flag_generate_vmcoreinfo || info->flag_rearrange) + return FALSE; + + if ((message_level < MIN_MSG_LEVEL) + || (MAX_MSG_LEVEL < message_level)) { + message_level = DEFAULT_MSG_LEVEL; + MSG("Message_level is invalid.\n"); + return FALSE; + } + if ((info->flag_compress && info->flag_elf_dumpfile) + || (info->flag_read_vmcoreinfo && info->name_vmlinux) + || (info->flag_read_vmcoreinfo && info->name_xen_syms)) + return FALSE; + + if (info->flag_flatten && info->flag_split) + return FALSE; + + if (info->name_filterconfig && !info->name_vmlinux) + return FALSE; + + if (info->flag_sadump_diskset && !sadump_is_supported_arch()) + return FALSE; + + if (info->num_threads) { + if (info->flag_split) { + MSG("--num-threads cannot used with --split.\n"); + return FALSE; + } + + if (info->flag_elf_dumpfile) { + MSG("--num-threads cannot used with ELF format.\n"); + return FALSE; + } + } + + if (info->flag_partial_dmesg && !info->flag_dmesg) + return FALSE; + + if ((argc == optind + 2) && !info->flag_flatten + && !info->flag_split + && !info->flag_sadump_diskset) { + /* + * Parameters for creating the dumpfile from vmcore. + */ + info->name_memory = argv[optind]; + info->name_dumpfile = argv[optind+1]; + + } else if (info->flag_split && (info->flag_sadump_diskset + ? (argc >= optind + 2) + : (argc > optind + 2))) { + int num_vmcore; + + /* + * Parameters for creating multiple dumpfiles from vmcore. + */ + if (info->flag_sadump_diskset) { + num_vmcore = 0; + info->name_memory = sadump_head_disk_name_memory(); + } else { + num_vmcore = 1; + info->name_memory = argv[optind]; + } + info->num_dumpfile = argc - optind - num_vmcore; + + if (info->flag_elf_dumpfile) { + MSG("Options for splitting dumpfile cannot be used with Elf format.\n"); + return FALSE; + } + if ((info->splitting_info + = malloc(sizeof(splitting_info_t) * info->num_dumpfile)) + == NULL) { + MSG("Can't allocate memory for splitting_info.\n"); + return FALSE; + } + for (i = 0; i < info->num_dumpfile; i++) + SPLITTING_DUMPFILE(i) = argv[optind + num_vmcore + i]; + + } else if ((argc == optind + 1) && !info->flag_split + && info->flag_sadump_diskset) { + info->name_dumpfile = argv[optind]; + info->name_memory = sadump_head_disk_name_memory(); + + DEBUG_MSG("name_dumpfile: %s\n", info->name_dumpfile); + DEBUG_MSG("name_memory: %s\n", info->name_memory); + + } else if ((argc == optind + 1) && info->flag_flatten) { + /* + * Parameters for outputting the dump data of the + * flattened format to STDOUT. + */ + info->name_memory = argv[optind]; + + } else if ((argc == optind + 1) && info->flag_mem_usage) { + /* + * Parameter for showing the page number of memory + * in different use from. + */ + info->name_memory = argv[optind]; + + } else + return FALSE; + + if (info->num_threads) { + if ((info->parallel_info = + malloc(sizeof(parallel_info_t) * info->num_threads)) + == NULL) { + MSG("Can't allocate memory for parallel_info.\n"); + return FALSE; + } + + memset(info->parallel_info, 0, sizeof(parallel_info_t) + * info->num_threads); + } + + return TRUE; +} + +int +parse_dump_level(char *str_dump_level) +{ + int i, ret = FALSE; + char *buf, *ptr; + + if (!(buf = strdup(str_dump_level))) { + MSG("Can't duplicate strings(%s).\n", str_dump_level); + return FALSE; + } + info->max_dump_level = 0; + info->num_dump_level = 0; + ptr = buf; + while(TRUE) { + ptr = strtok(ptr, ","); + if (!ptr) + break; + + i = atoi(ptr); + if ((i < MIN_DUMP_LEVEL) || (MAX_DUMP_LEVEL < i)) { + MSG("Dump_level(%d) is invalid.\n", i); + goto out; + } + if (NUM_ARRAY_DUMP_LEVEL <= info->num_dump_level) { + MSG("Dump_level is invalid.\n"); + goto out; + } + if (info->max_dump_level < i) + info->max_dump_level = i; + if (info->num_dump_level == 0) + info->dump_level = i; + info->array_dump_level[info->num_dump_level] = i; + info->num_dump_level++; + ptr = NULL; + } + ret = TRUE; +out: + free(buf); + + return ret; +} + +/* + * Get the amount of free memory from /proc/meminfo. + */ +unsigned long long +get_free_memory_size(void) { + char buf[BUFSIZE_FGETS]; + char unit[4]; + unsigned long long free_size = 0; + char *name_meminfo = "/proc/meminfo"; + FILE *file_meminfo; + + if ((file_meminfo = fopen(name_meminfo, "r")) == NULL) { + ERRMSG("Can't open the %s. %s\n", name_meminfo, strerror(errno)); + return FALSE; + } + + while (fgets(buf, BUFSIZE_FGETS, file_meminfo) != NULL) { + if (sscanf(buf, "MemFree: %llu %s", &free_size, unit) == 2) { + if (strcmp(unit, "kB") == 0) { + free_size *= 1024; + goto out; + } + } + } + + ERRMSG("Can't get free memory size.\n"); + free_size = 0; +out: + if (fclose(file_meminfo) < 0) + ERRMSG("Can't close the %s. %s\n", name_meminfo, strerror(errno)); + + return free_size; +} + +/* + * Choose the less value of the three below as the size of cyclic buffer. + * - the size enough for storing the 1st or 2nd bitmap for the whole of vmcore + * - 4MB as sufficient value + * - 60% of free memory as safety limit + */ +int +calculate_cyclic_buffer_size(void) { + unsigned long long limit_size, bitmap_size; + const unsigned long long maximum_size = 4 * 1024 * 1024; + + if (info->max_mapnr <= 0) { + ERRMSG("Invalid max_mapnr(%llu).\n", info->max_mapnr); + return FALSE; + } + + /* + * At least, we should keep the size of cyclic buffer within 60% of + * free memory for safety. + */ + limit_size = get_free_memory_size() * 0.6; + + /* + * Recalculate the limit_size according to num_threads. + * And reset num_threads if there is not enough memory. + */ + if (info->num_threads > 0) { + if (limit_size <= maximum_size) { + MSG("There isn't enough memory for multi-threads.\n"); + info->num_threads = 0; + } + else if ((limit_size - maximum_size) / info->num_threads < THREAD_REGION) { + MSG("There isn't enough memory for %d threads.\n", info->num_threads); + info->num_threads = (limit_size - maximum_size) / THREAD_REGION; + MSG("--num_threads is set to %d.\n", info->num_threads); + + limit_size = limit_size - THREAD_REGION * info->num_threads; + } + } + + /* Try to keep both 1st and 2nd bitmap at the same time. */ + bitmap_size = info->max_mapnr * 2 / BITPERBYTE; + + /* if --split was specified cyclic buffer allocated per dump file */ + if (info->num_dumpfile > 1) + bitmap_size /= info->num_dumpfile; + + /* 4MB will be enough for performance according to benchmarks. */ + info->bufsize_cyclic = MIN(MIN(limit_size, maximum_size), bitmap_size); + + return TRUE; +} + + + +/* #define CRASH_RESERVED_MEM_NR 8 */ +struct memory_range crash_reserved_mem[CRASH_RESERVED_MEM_NR]; +int crash_reserved_mem_nr; + +/* + * iomem_for_each_line() + * + * Iterate over each line in the file returned by proc_iomem(). If match is + * NULL or if the line matches with our match-pattern then call the + * callback if non-NULL. + * + * Return the number of lines matched. + */ +int iomem_for_each_line(char *match, + int (*callback)(void *data, + int nr, + char *str, + unsigned long base, + unsigned long length), + void *data) +{ + const char iomem[] = "/proc/iomem"; + char line[BUFSIZE_FGETS]; + FILE *fp; + unsigned long long start, end, size; + char *str; + int consumed; + int count; + int nr = 0; + + fp = fopen(iomem, "r"); + if (!fp) { + ERRMSG("Cannot open %s\n", iomem); + return nr; + } + + while (fgets(line, sizeof(line), fp) != 0) { + count = sscanf(line, "%Lx-%Lx : %n", &start, &end, &consumed); + if (count != 2) + continue; + str = line + consumed; + size = end - start + 1; + if (!match || memcmp(str, match, strlen(match)) == 0) { + if (callback + && callback(data, nr, str, start, size) < 0) { + break; + } + nr++; + } + } + + fclose(fp); + + return nr; +} + +static int crashkernel_mem_callback(void *data, int nr, + char *str, + unsigned long base, + unsigned long length) +{ + if (nr >= CRASH_RESERVED_MEM_NR) + return 1; + + crash_reserved_mem[nr].start = base; + crash_reserved_mem[nr].end = base + length - 1; + return 0; +} + +int is_crashkernel_mem_reserved(void) +{ + int ret; + + if (arch_crashkernel_mem_size()) + return TRUE; + + ret = iomem_for_each_line("Crash kernel\n", + crashkernel_mem_callback, NULL); + crash_reserved_mem_nr = ret; + + return !!crash_reserved_mem_nr; +} + +static int get_page_offset(void) +{ + if (!populate_kernel_version()) + return FALSE; + + get_versiondep_info(); + + return TRUE; +} + +/* Returns the physical address of start of crash notes buffer for a kernel. */ +static int get_sys_kernel_vmcoreinfo(uint64_t *addr, uint64_t *len) +{ + char line[BUFSIZE_FGETS]; + int count; + FILE *fp; + unsigned long long temp, temp2; + + *addr = 0; + *len = 0; + + if (!(fp = fopen("/sys/kernel/vmcoreinfo", "r"))) + return FALSE; + + if (!fgets(line, sizeof(line), fp)) { + ERRMSG("Cannot parse %s: %s, fgets failed.\n", + "/sys/kernel/vmcoreinfo", strerror(errno)); + return FALSE; + } + count = sscanf(line, "%Lx %Lx", &temp, &temp2); + if (count != 2) { + ERRMSG("Cannot parse %s: %s, sscanf failed.\n", + "/sys/kernel/vmcoreinfo", strerror(errno)); + return FALSE; + } + + *addr = (uint64_t) temp; + *len = (uint64_t) temp2; + + fclose(fp); + return TRUE; +} + +int show_mem_usage(void) +{ + uint64_t vmcoreinfo_addr, vmcoreinfo_len; + struct cycle cycle = {0}; + + if (!is_crashkernel_mem_reserved()) { + ERRMSG("No memory is reserved for crashkernel!\n"); + return FALSE; + } + + info->dump_level = MAX_DUMP_LEVEL; + + if (!open_files_for_creating_dumpfile()) + return FALSE; + + if (!get_elf_loads(info->fd_memory, info->name_memory)) + return FALSE; + + if (!get_page_offset()) + return FALSE; + + if (!get_sys_kernel_vmcoreinfo(&vmcoreinfo_addr, &vmcoreinfo_len)) + return FALSE; + + if (!set_kcore_vmcoreinfo(vmcoreinfo_addr, vmcoreinfo_len)) + return FALSE; + + if (!initial()) + return FALSE; + + if (!open_dump_bitmap()) + return FALSE; + + if (!prepare_bitmap_buffer()) + return FALSE; + + pfn_memhole = info->max_mapnr; + first_cycle(0, info->max_mapnr, &cycle); + if (!create_1st_bitmap(&cycle)) + return FALSE; + if (!create_2nd_bitmap(&cycle)) + return FALSE; + + info->num_dumpable = get_num_dumpable_cyclic(); + + free_bitmap_buffer(); + + print_mem_usage(); + + if (!close_files_for_creating_dumpfile()) + return FALSE; + + return TRUE; +} + + +static struct option longopts[] = { + {"split", no_argument, NULL, OPT_SPLIT}, + {"reassemble", no_argument, NULL, OPT_REASSEMBLE}, + {"xen-syms", required_argument, NULL, OPT_XEN_SYMS}, + {"xen-vmcoreinfo", required_argument, NULL, OPT_XEN_VMCOREINFO}, + {"xen_phys_start", required_argument, NULL, OPT_XEN_PHYS_START}, + {"message-level", required_argument, NULL, OPT_MESSAGE_LEVEL}, + {"vtop", required_argument, NULL, OPT_VTOP}, + {"dump-dmesg", no_argument, NULL, OPT_DUMP_DMESG}, + {"partial-dmesg", no_argument, NULL, OPT_PARTIAL_DMESG}, + {"config", required_argument, NULL, OPT_CONFIG}, + {"help", no_argument, NULL, OPT_HELP}, + {"diskset", required_argument, NULL, OPT_DISKSET}, + {"cyclic-buffer", required_argument, NULL, OPT_CYCLIC_BUFFER}, + {"eppic", required_argument, NULL, OPT_EPPIC}, + {"non-mmap", no_argument, NULL, OPT_NON_MMAP}, + {"mem-usage", no_argument, NULL, OPT_MEM_USAGE}, + {"splitblock-size", required_argument, NULL, OPT_SPLITBLOCK_SIZE}, + {"work-dir", required_argument, NULL, OPT_WORKING_DIR}, + {"num-threads", required_argument, NULL, OPT_NUM_THREADS}, + {0, 0, 0, 0} +}; + +int +main(int argc, char *argv[]) +{ + int i, opt, flag_debug = FALSE; + + if ((info = calloc(1, sizeof(struct DumpInfo))) == NULL) { + ERRMSG("Can't allocate memory for the pagedesc cache. %s.\n", + strerror(errno)); + goto out; + } + if ((info->dump_header = calloc(1, sizeof(struct disk_dump_header))) + == NULL) { + ERRMSG("Can't allocate memory for the dump header. %s\n", + strerror(errno)); + goto out; + } + info->file_vmcoreinfo = NULL; + info->fd_vmlinux = -1; + info->fd_xen_syms = -1; + info->fd_memory = -1; + info->fd_dumpfile = -1; + info->fd_bitmap = -1; + info->kaslr_offset = 0; + initialize_tables(); + + /* + * By default, makedumpfile assumes that multi-cycle processing is + * necessary to work in constant memory space. + */ + info->flag_cyclic = TRUE; + + /* + * By default, makedumpfile try to use mmap(2) to read /proc/vmcore. + */ + info->flag_usemmap = MMAP_TRY; + + info->block_order = DEFAULT_ORDER; + message_level = DEFAULT_MSG_LEVEL; + while ((opt = getopt_long(argc, argv, "b:cDd:eEFfg:hi:lpRvXx:", longopts, + NULL)) != -1) { + switch (opt) { + case OPT_BLOCK_ORDER: + info->block_order = atoi(optarg); + break; + case OPT_CONFIG: + info->name_filterconfig = optarg; + break; + case OPT_COMPRESS_ZLIB: + info->flag_compress = DUMP_DH_COMPRESSED_ZLIB; + break; + case OPT_DEBUG: + flag_debug = TRUE; + break; + case OPT_DUMP_LEVEL: + if (!parse_dump_level(optarg)) + goto out; + break; + case OPT_ELF_DUMPFILE: + info->flag_elf_dumpfile = 1; + break; + case OPT_FLATTEN: + info->flag_flatten = 1; + /* + * All messages are output to STDERR because STDOUT is + * used for outputting dump data. + */ + flag_strerr_message = TRUE; + break; + case OPT_FORCE: + info->flag_force = 1; + break; + case OPT_GENERATE_VMCOREINFO: + info->flag_generate_vmcoreinfo = 1; + info->name_vmcoreinfo = optarg; + break; + case OPT_HELP: + info->flag_show_usage = 1; + break; + case OPT_READ_VMCOREINFO: + info->flag_read_vmcoreinfo = 1; + info->name_vmcoreinfo = optarg; + break; + case OPT_EXCLUDE_UNUSED_VM: + info->flag_excludevm = 1; /* exclude unused vmemmap pages */ + info->flag_cyclic = FALSE; /* force create_2nd_bitmap */ + break; + case OPT_DISKSET: + if (!sadump_add_diskset_info(optarg)) + goto out; + info->flag_sadump_diskset = 1; + break; + case OPT_COMPRESS_LZO: + info->flag_compress = DUMP_DH_COMPRESSED_LZO; + break; + case OPT_MESSAGE_LEVEL: + message_level = atoi(optarg); + break; + case OPT_DUMP_DMESG: + info->flag_dmesg = 1; + break; + case OPT_PARTIAL_DMESG: + info->flag_partial_dmesg = 1; + break; + case OPT_MEM_USAGE: + info->flag_mem_usage = 1; + break; + case OPT_COMPRESS_SNAPPY: + info->flag_compress = DUMP_DH_COMPRESSED_SNAPPY; + break; + case OPT_XEN_PHYS_START: + info->xen_phys_start = strtoul(optarg, NULL, 0); + break; + case OPT_REARRANGE: + info->flag_rearrange = 1; + break; + case OPT_SPLIT: + info->flag_split = 1; + break; + case OPT_EPPIC: + info->name_eppic_config = optarg; + break; + case OPT_REASSEMBLE: + info->flag_reassemble = 1; + break; + case OPT_VTOP: + info->vaddr_for_vtop = strtoul(optarg, NULL, 0); + break; + case OPT_VERSION: + info->flag_show_version = 1; + break; + case OPT_EXCLUDE_XEN_DOM: + info->flag_exclude_xen_dom = 1; + break; + case OPT_VMLINUX: + info->name_vmlinux = optarg; + break; + case OPT_XEN_SYMS: + info->name_xen_syms = optarg; + break; + case OPT_NON_MMAP: + info->flag_usemmap = MMAP_DISABLE; + break; + case OPT_XEN_VMCOREINFO: + info->flag_read_vmcoreinfo = 1; + info->name_vmcoreinfo = optarg; + break; + case OPT_CYCLIC_BUFFER: + info->bufsize_cyclic = atoi(optarg); + break; + case OPT_SPLITBLOCK_SIZE: + info->splitblock_size = atoi(optarg); + break; + case OPT_WORKING_DIR: + info->working_dir = optarg; + break; + case OPT_NUM_THREADS: + info->num_threads = MAX(atoi(optarg), 0); + break; + case '?': + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + goto out; + } + } + if (flag_debug) + message_level |= ML_PRINT_DEBUG_MSG; + + if (info->flag_excludevm && !info->working_dir) { + ERRMSG("Error: -%c requires --work-dir\n", OPT_EXCLUDE_UNUSED_VM); + ERRMSG("Try `makedumpfile --help' for more information\n"); + return COMPLETED; + } + + if (info->flag_show_usage) { + print_usage(); + return COMPLETED; + } + if (info->flag_show_version) { + show_version(); + return COMPLETED; + } + + if (elf_version(EV_CURRENT) == EV_NONE ) { + /* + * library out of date + */ + ERRMSG("Elf library out of date!\n"); + goto out; + } + if (info->flag_generate_vmcoreinfo) { + if (!check_param_for_generating_vmcoreinfo(argc, argv)) { + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + goto out; + } + if (!open_files_for_generating_vmcoreinfo()) + goto out; + + if (info->name_xen_syms) { + if (!generate_vmcoreinfo_xen()) + goto out; + } else { + if (!generate_vmcoreinfo()) + goto out; + } + + if (!close_files_for_generating_vmcoreinfo()) + goto out; + + MSG("\n"); + MSG("The vmcoreinfo is saved to %s.\n", info->name_vmcoreinfo); + + } else if (info->flag_rearrange) { + if (!check_param_for_rearranging_dumpdata(argc, argv)) { + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + goto out; + } + if (!check_dump_file(info->name_dumpfile)) + goto out; + + if (!open_files_for_rearranging_dumpdata()) + goto out; + + if (!rearrange_dumpdata()) + goto out; + + if (!close_files_for_rearranging_dumpdata()) + goto out; + + MSG("\n"); + MSG("The dumpfile is saved to %s.\n", info->name_dumpfile); + } else if (info->flag_reassemble) { + if (!check_param_for_reassembling_dumpfile(argc, argv)) { + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + goto out; + } + if (!check_dump_file(info->name_dumpfile)) + goto out; + + if (!reassemble_dumpfile()) + goto out; + MSG("\n"); + MSG("The dumpfile is saved to %s.\n", info->name_dumpfile); + } else if (info->flag_dmesg) { + if (!check_param_for_creating_dumpfile(argc, argv)) { + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + goto out; + } + if (!check_dump_file(info->name_dumpfile)) + goto out; + if (!dump_dmesg()) + goto out; + + MSG("\n"); + MSG("The dmesg log is saved to %s.\n", info->name_dumpfile); + } else if (info->flag_mem_usage) { + if (!check_param_for_creating_dumpfile(argc, argv)) { + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + goto out; + } + if (!populate_kernel_version()) + goto out; + + if (info->kernel_version < KERNEL_VERSION(4, 11, 0) && + !info->flag_force) { + MSG("mem-usage not supported for this kernel.\n"); + MSG("You can try with -f if your kernel's kcore has valid p_paddr\n"); + goto out; + } + + if (!show_mem_usage()) + goto out; + } else { + if (!check_param_for_creating_dumpfile(argc, argv)) { + MSG("Commandline parameter is invalid.\n"); + MSG("Try `makedumpfile --help' for more information.\n"); + goto out; + } + if (info->flag_split) { + for (i = 0; i < info->num_dumpfile; i++) { + SPLITTING_FD_BITMAP(i) = -1; + if (!check_dump_file(SPLITTING_DUMPFILE(i))) + goto out; + } + } else { + if (!check_dump_file(info->name_dumpfile)) + goto out; + } + + if (!create_dumpfile()) + goto out; + + MSG("\n"); + if (info->flag_split) { + MSG("The dumpfiles are saved to "); + for (i = 0; i < info->num_dumpfile; i++) { + if (i != (info->num_dumpfile - 1)) + MSG("%s, ", SPLITTING_DUMPFILE(i)); + else + MSG("and %s.\n", SPLITTING_DUMPFILE(i)); + } + } else { + MSG("The dumpfile is saved to %s.\n", info->name_dumpfile); + } + } + retcd = COMPLETED; +out: + MSG("\n"); + if (retcd != COMPLETED) + MSG("makedumpfile Failed.\n"); + else if (!info->flag_mem_usage) + MSG("makedumpfile Completed.\n"); + + free_for_parallel(); + + if (info) { + if (info->dh_memory) + free(info->dh_memory); + if (info->kh_memory) + free(info->kh_memory); + if (info->valid_pages) + free(info->valid_pages); + if (info->bitmap_memory) { + if (info->bitmap_memory->buf) + free(info->bitmap_memory->buf); + free(info->bitmap_memory); + } + if (info->fd_memory >= 0) + close(info->fd_memory); + if (info->fd_dumpfile >= 0) + close(info->fd_dumpfile); + if (info->fd_bitmap >= 0) + close(info->fd_bitmap); + if (vt.node_online_map != NULL) + free(vt.node_online_map); + if (info->mem_map_data != NULL) + free(info->mem_map_data); + if (info->dump_header != NULL) + free(info->dump_header); + if (info->splitting_info != NULL) + free(info->splitting_info); + if (info->p2m_mfn_frame_list != NULL) + free(info->p2m_mfn_frame_list); + if (info->page_buf != NULL) + free(info->page_buf); + if (info->parallel_info != NULL) + free(info->parallel_info); + free(info); + + if (splitblock) { + if (splitblock->table) + free(splitblock->table); + free(splitblock); + } + } + free_elf_info(); + + return retcd; +} diff --git a/makedumpfile.conf b/makedumpfile.conf new file mode 100644 index 0000000..cd0e4ab --- /dev/null +++ b/makedumpfile.conf @@ -0,0 +1,149 @@ +## Filter config file +## +## Description: +## Configuration file to specify filter commands to filter out desired +## kernel data from vmcore. It supports erasing of symbol data and +## it's members of any data type. In case of filtering of data pointed by +## void * pointer, user needs to provide size to filter out. +## +## Please refer to manpage makedumpfile.conf(5) for more details. +## +## +## - Module section +## ========================================================= +## Syntax: +## [ModuleName] +## +## Define the module section where the symbols specified in subsequent erase +## commands belong to. The section name is a kernel module name (including +## vmlinux). The unnamed section defaults to [vmlinux] section. +## +## NOTE: There should not be any whitespaces before or after the ModuleName. +## +## e.g. +## [vmlinux] # Symbols in erase command belongs to main kernel (vmlinux) +## erase modules +## erase cred_jar.name +## erase cred_jar.name size 10 +## erase cred_jar.array +## erase vmlist.addr nullify +## +## [z90crypt] # Symbols in erase command belongs to kernel module z90crypt +## erase ap_device_list +## +## # erase entire CPRBX structure +## erase static_cprbx +## +## +## - To erase kernel data referred through a kernel Symbol +## ========================================================= +## Syntax: +## erase <Symbol>[.member[...]] [size <SizeValue>[K|M]] +## erase <Symbol>[.member[...]] [size <SizeSymbol>] +## erase <Symbol>[.member[...]] [nullify]] +## +## where +## <Symbol> +## A variable name from the kernel or module, which is part of +## global symbols '/proc/kallsyms'. +## <SizeValue> +## Integer value that specifies size of data to be erased. The +## suffixes 'K' and 'M' can be used to specify kilobytes and +## megabytes respectively where, K means 1024 bytes and M means +## 1024 ^ 2 = 1048576 bytes. +## <SizeSymbol> +## A simple axpression of the form <Symbol>[.member[...]] that +## denotes a symbol which contains a positive integer value as a +## size of the data in bytes to be erased. +## +## Filter out the specified size of the data referred by symbol/member. +## If size option is not provided then the size of <Symbol> will be calculated +## according to it's data type. For 'char *' data type, string length will be +## determined with an upper limit of 1024. +## +## If specified <Symbol> is of type 'void *', then user needs to provide +## either 'size' or 'nullify' option. Otherwise erase command will not have +## any effect. +## +## The option 'nullify' will work only if filter symbol/member is a pointer and +## is used to set NULL value to the pointer type symbol/member. +## +## NOTE: Please note that by nullifying pointer values will affect the +## debug ability of created DUMPFILE. Use 'nullify' option only when size of +## data to be filter out is not known e.g. data pointed by 'void *'. +## +## e.g. +## [vmlinux] +## erase modules +## erase cred_jar.name +## erase cred_jar.name size 10 +## erase cred_jar.array +## erase vmlist.addr nullify +## +## +## - To filter kernel data referred through Array/list_head Symbol +## ================================================================= +## Syntax: +## for <id> in { <ArrayVar> | +## <StructVar> via <NextMember> | +## <ListHeadVar> within <StructName>:<ListHeadMember> } +## erase <id>[.MemberExpression] [size <SizeExpression>|nullify] +## [erase <id> ...] +## [...] +## endfor +## +## where +## <id> +## Arbitrary name used to temporarily point to elements of the +## list. Referred as iteration variable. +## <ArrayVar> +## A simple expression in the form of <Symbol>[.member[...]] that +## results into an array variable. +## <StructVar> +## A simple expression in the form of <Symbol>[.member[...]] that +## results into a variable that points to a structure. +## <NextMember> +## Member within <StructVar> that points to an object of same +## type that of <StructVar>. +## <ListHeadVar> +## A simple expression in the form of <Symbol>[.member[...]] that +## results into a variable of type struct list_head. +## <StructName> +## Name of the structure type that can be traversed using +## HEAD variable <ListHeadVar> and contains a member named +## <ListHeadMember>. +## <ListHeadMember> +## Name of a member in <StructName>, of type struct list_head. +## <MemberExpression> +## A simple expression in the form of [.member[...]] to specify a +## member or component of a member in <ArrayVar>, <StructVar> or +## <StructName>. +## <SizeExpression> +## One of the following: +## - An integer value. +## - <Symbol>[.member[...]] +## - <id>[.MemberExpresion] +## +## The <ArrayVar>, <StructVar> and <ListHeadVar> is also referred as LIST +## entry +## +## Filter out the specified size of the data accessible through LIST entries. +## e.g. +## [vmlinux] +## # Traversing <ListHeadVar> +## for m in modules.next within module:list +## erase m.holders_dir.name +## endfor +## # Traversing <ArrayVar> +## for lc in lowcore_ptr +## erase lc +## endfor +## # Traversing link-list +## for cj in cred_jar via slabp_cache +## erase cj.name +## endfor +## [z90crypt] +## for ap_dev in ap_device_list.next within ap_device:list +## erase ap_dev.reply.message size ap_dev.reply.length +## endfor +## diff --git a/makedumpfile.conf.5 b/makedumpfile.conf.5 new file mode 100644 index 0000000..b111019 --- /dev/null +++ b/makedumpfile.conf.5 @@ -0,0 +1,419 @@ +.TH MAKEDUMPFILE.CONF 5 "3 Jul 2018" "makedumpfile v1.6.4" "Linux System Administrator's Manual" +.SH NAME +makedumpfile.conf \- The filter configuration file for makedumpfile(8). +.SH DESCRIPTION +.PP +The makedumpfile.conf is a configuration file for makedumpfile tool. +makedumpfile.conf file contains the erase commands to filter out desired kernel +data from the vmcore while creating \fIDUMPFILE\fR using makedumpfile tool. +makedumpfile reads the filter config and builds the list of memory addresses +and its sizes after processing filter commands. The memory locations that +require to be filtered out are then poisoned with character \fIX\fR (58 in Hex). +.SH FILE FORMAT +.PP +The file consists of module sections that contains filter commands. A section +begins with the name of the section in square brackets and continues until the +next section begins. + +.br +"["<\fIModuleName\fR>"]" +.br +<\fIFilterCommands\fR> +.br + +where +.br +"[" is the character \fB[\fR +.br +"]" is the character \fB]\fR +.TP +<\fIModuleName\fR> +is either 'vmlinux' or name of a Linux kernel module. +.TP +<\fIFilterCommands\fR> +is a list of one or more filter commands as described in the section +\fBFILTER COMMANDS\fR of this manual page. +.PP +The section name indicates a kernel module name (including \fBvmlinux\fR) where +the symbols specified in subsequent erase commands belong to. The unnamed +section defaults to \fB[vmlinux]\fR section. However, a user can also explicitly +define \fB[vmlinux]\fR section. The sections help makedumpfile tool to select +appropriate kernel or module debuginfo file before processing the subsequent +erase commands. Before selecting appropriate debuginfo file, the module name +is validated against the loaded modules from the vmcore. If no match is found, +then the section is ignored and makedumpfile skips to the next module section. +If match is found, then makedumpfile will try to load the corresponding +module debuginfo file. If module debuginfo is not available then, makedumpfile +will skip the section with a warning message. +.SH FILTER COMMANDS +.SS filter command +.PP +A filter command is either an erase command or a loop construct. Each erase +command and loop construct must start with a new line. Each filter command +describes data in the dump to be erased. Syntax: + +.br +<\fIEraseCommands\fR>|<\fILoopConstruct\fR> +.br + +where +.TP +<\fIEraseCommands\fR> +Described in the subsection \fBerase command\fR of this manual page. +.TP +<\fILoopConstruct\fR> +Described in the subsection \fBLoop construct\fR of this manual page. +.SS erase command +.PP +Erase specified size of a kernel data referred by specified kernel/module +symbol or its member component. The erase command syntax is: + +.br +\fBerase\fR <\fISymbol\fR>[.\fImember\fR[...]] [\fBsize\fR +<\fISizeValue\fR>[K|M]] +.br +\fBerase\fR <\fISymbol\fR>[.\fImember\fR[...]] [\fBsize\fR <\fISizeSymbol\fR>] +.br +\fBerase\fR <\fISymbol\fR>[.\fImember\fR[...]] [\fBnullify\fR] +.br + +where +.br +.TP +<\fISymbol\fR> +A kernel or module symbol (variable) name that is part of global symbols +\fB/proc/kallsyms\fR. +.TP +<\fISizeValue\fR> +A positive integer value as a size of the data in bytes to be erased. The +suffixes 'K' and 'M' can be used to specify kilobytes and Megabytes +respectively where, K means 1024 bytes and M means 1024 ^ 2 = 1048576 bytes. +The suffixes are not case sensitive. +.TP +<\fISizeSymbol\fR> +A simple expression of the form <\fISymbol\fR>[.\fImember\fR[...]] that denotes +a symbol which contains a positive integer value as a size of the data in bytes +to be erased. +.TP +<\fISymbol\fR>[.\fImember\fR[...]] +A simple expression that results into either a global kernel symbol name or +its member components. The expression always uses '.' operator to specify +the \fImember\fR component of kernel symbol or its member irrespective of +whether it is of pointer type or not. +.TP +\fImember\fR[...] +Member or component of member in <\fISymbol\fR>. +.PP +The \fBerase\fR command takes two arguments 1. kernel symbol name or its +member components and 2. size of the data referred by argument (1) OR +\fBnullify\fR keyword. The second argument \fBsize\fR OR \fBnullify\fR is +optional. The unit for size value is in \fBbytes\fR. If \fBsize\fR option is +not specified then the size of the first argument is determined according to +its data type using dwarf info from debuginfo file. In case of '\fBchar *\fR' +data type, the length of string pointed by '\fBchar *\fR' pointer is determined +with an upper limit of 1024. The \fBsize\fR can be specified in two forms 1. +a integer value as explained above (<\fISizeValue\fR>) and 2. a simple +expression in the form of <\fISymbol\fR>[.\fImember\fR[...]]] that results into +base type (integer) variable. +.PP +If the specified <\fISymbol\fR> is of type '\fBvoid *\fR', then user needs to +provide either \fBsize\fR or \fBnullify\fR option, otherwise the erase command +will not have any effect. +.PP +The \fBnullify\fR option only works if specified <\fISymbol\fR> is a pointer. +Instead of erasing data pointed by the specified pointer \fBnullify\fR erases +the pointer value and set it to '0' (NULL). Please note that by nullifying +the pointer values may affect the debug ability of created \fIDUMPFILE\fR. +Use the \fBnullify\fR option only when the size of data to be erased is not +known. \fBe.g.\fR data pointed by '\fBvoid *\fR'. +.PP +Let us look at the makedumpfile.conf file from the example below which was +configured to erase desired kernel data from the kernel module with name +\fBmymodule\fR. At line 1 and 3, the user has not specified size option while +erasing 'array_var' and 'mystruct1.name' symbols. Instead the user depends on +makedumpfile to automatically determine the sizes to be erased \fBi.e\fR +100 bytes for 'array_var' and 11 bytes for 'mystruct1.name'. At line 2, +while erasing the 'mystruct1.buffer' member the user has specified the size +value 25 against the actual size of 50. In this case the user specified +\fBsize\fR takes the precedence and makedumpfile erases only 25 bytes from +\'mystruct1.buffer'. At line 4, the size of the data pointed by \fBvoid *\fR +pointer 'mystruct1.addr' is unknown. Hence the \fBnullify\fR option has been +specified to reset the pointer value to NULL. At line 5, the +\'mystruct2.addr_size' is specified as \fBsize\fR argument to determine the +size of the data pointed by \fBvoid *\fR pointer 'mystruct2.addr'. +.br + +.B Example: +.PP +Assuming the following piece of code is from kernel module 'mymodule': +.br + +struct s1 { +.br + char *name; +.br + void *addr1; +.br + void *addr2; +.br + char buffer[50]; +.br +}; +.br +struct s2 { +.br + void *addr; +.br + long addr_size; +.br +}; +.br + +/* Global symbols */ +.br +char array_var[100]; +.br +struct s1 mystruct1; +.br +struct s2 *mystruct2; +.br + +int foo() +.br +{ +.br + ... +.br + s1.name = "Hello World"; +.br + ... +.br +} +.br + +\fBmakedumpfile.conf:\fR +.br +[mymodule] +.br +erase array_var +.br +erase mystruct1.buffer size 25 +.br +erase mystruct1.name +.br +erase mystruct1.addr1 nullify +.br +# Assuming addr2 points to 1024 bytes +.br +erase mystruct1.addr2 size 1K +.br +erase mystruct2.addr size mystruct2.addr_size +.br +.B EOF + +.SS Loop construct +.PP +A Loop construct allows the user to traverse the linked list or array elements +and erase the data contents referred by each element. + +.br +\fBfor\fR <\fIid\fR> \fBin\fR {<\fIArrayVar\fR> | +.br + <\fIStructVar\fR> \fBvia\fR <\fINextMember\fR> | +.br + <\fIListHeadVar\fR> \fBwithin\fR +<\fIStructName\fR>\fB:\fR<\fIListHeadMember\fR>} +.br + \fBerase\fR <\fIid\fR>[.\fIMemberExpression\fR] +[\fBsize\fR <\fISizeExpression\fR>|\fBnullify\fR] +.br + [\fBerase\fR <\fIid\fR>...] +.br + [...] +.br +\fBendfor\fR +.PP +where +.PP +.TP +<\fIid\fR> +Arbitrary name used to temporarily point to elements of the list. This is +also called iteration variable. +.TP +<\fIArrayVar\fR> +A simple expression in the form of <\fISymbol\fR>[.\fImember\fR[...]] that +results into an array variable. +.TP +<\fIStructVar\fR> +A simple expression in the form of <\fISymbol\fR>[.\fImember\fR[...]] that +results into a variable that points to a structure. +.TP +<\fINextMember\fR> +Member within <\fIStructVar\fR> that points to an object of same type that of +<\fIStructVar\fR>. +.TP +<\fIListHeadVar\fR> +A simple expression in the form of <\fISymbol\fR>[.\fImember\fR[...]] that +results into a variable of type struct list_head. +.TP +<\fIStructName\fR> +Name of the structure type that can be traversed using HEAD variable +<\fIListHeadVar\fR> and contains a member named <\fIListHeadMember\fR>. +.TP +<\fIListHeadMember\fR> +Name of a member in <\fIStructName\fR>, of type struct list_head. +.TP +<\fIMemberExpression\fR> +A simple expression in the form of [.\fImember\fR[...]] to specify a member +or component of an element in <\fIArrayVar\fR>, <\fIStructVar\fR> +or <\fIStructName\fR>. +.TP +<\fISizeExpression\fR> +Size value in the form of <\fISizeValue\fR>, <\fIid\fR>[.\fIMemberExpression\fR] +or <\fISymbol\fR>[.\fImember\fR[...]]. +.PP +The \fBfor\fR loop construct allows to iterate on list of elements in an array +or linked lists. Each element in the list is assigned to iteration variable +<\fIid\fR>. The type of the iteration variable is determined by that of the +list elements. The entry specified after '\fBin\fR' terminal is called LIST +entry. The LIST entry can be an array variable, structure variable/pointer or a +struct list_head type variable. The set of \fBerase\fR commands specified +between \fBfor\fR and \fBendfor\fR, will be executed for each element in the +LIST entry. +.PP +If the LIST entry specified is an array variable, then the loop will be +executed for each array element. The size of the array will be determined by +using dwarf information. +.PP +If the LIST entry specified is a structure variable/pointer, then a traversal +member (<\fINextMember\fR>) must be specified using '\fBvia\fR' terminal. The +\fBfor\fR loop will continue until the value of traversal member is NULL or +matches with address of the first node <\fIStructVar\fR> if it is a circular +linked list. +.PP +If the LIST entry is specified using a struct list_head type variable, then +\fBwithin\fR terminal must be used to specify the structure name +<\fIStructName\fR> that is surrounding to it along with the struct list_head +type member after '\fB:\fR' which is part of the linked list. In the erase +statement <\fIid\fR> then denotes the structure that the list_head is +contained in (ELEMENT_OF). +.PP +The below example illustrates how to use loop construct for traversing +Array, linked list via next member and list_head. + +.B Example: +.PP +Assuming following piece of code is from kernel module 'mymodule': +.br + +struct s1 { +.br + struct *next; +.br + struct list_head list; +.br + char private[100]; +.br + void *key; +.br + long key_size; +.br +}; +.br + +/* Global symbols */ +.br +struct s1 mystruct1; +.br +static LIST_HEAD(s1_list_head); +.br +struct s1 myarray[100]; +.br + +void foo() +.br +{ +.br + struct s1 *s1_ptr; +.br + ... +.br + ... +.br + s1_ptr = malloc(...); +.br + ... +.br + ... +.br + list_add(&s1_ptr->list, &s1_list_head); +.br + ... +.br +} +.br + +\fBmakedumpfile.conf:\fR +.br +[mymodule] +.br +# erase private fields from list starting with mystruct1 connected via +.br +# 'next' member: +.br +for mys1 in mystruct1 via next +.br + erase mys1.private +.br + erase mys1.key size mys1.key_size +.br +endfor +.br + +# erase private fields from list starting with list_head variable +.br +# s1_list_head. +.br +for mys1 in s1_list_head.next within s1:list +.br + erase mys1.private +.br + erase mys1.key size mys1.key_size +.br +endfor +.br + +# erase private fields from all elements of the array myarray: +.br +for mys1 in myarray +.br + erase mys1.private +.br + erase mys1.key size mys1.key_size +.br +endfor +.br +.B EOF +.PP +In the above example, the first \fBfor\fR construct traverses the linked list +through a specified structure variable \fBmystruct1\fR of type \fBstruct s1\fR. +The linked list can be traversed using '\fBnext\fR' member of \fBmystruct1\fR. +Hence a \fBvia\fR terminal has been used to specify the traversal member +name '\fBnext\fR'. +.PP +The second \fBfor\fR construct traverses the linked list through a specified +struct list_head variable \fBs1_list_head.next\fR. The global symbol +\fBs1_list_head\fR is a start address of the linked list and its \fBnext\fR +member points to the address of struct list_head type member '\fBlist\fR' from +\fBstruct s1\fR. Hence a \fBwithin\fR terminal is used to specify the structure +name '\fBs1\fR' that can be traversed using \fBs1_list_head.next\fR variable +along with the name of struct list_head type member '\fBlist\fR' which is part +of the linked list that starts from \fBs1_list_head\fR global symbol. +.PP +The third \fBfor\fR construct traverses the array elements specified through +a array variable \fBmyarray\fR. +.br +.SH SEE ALSO +.PP +makedumpfile(8) + diff --git a/makedumpfile.h b/makedumpfile.h new file mode 100644 index 0000000..5ff94b8 --- /dev/null +++ b/makedumpfile.h @@ -0,0 +1,2370 @@ +/* + * makedumpfile.h + * + * Copyright (C) 2006, 2007, 2008, 2009, 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _MAKEDUMPFILE_H +#define _MAKEDUMPFILE_H + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <gelf.h> +#include <sys/stat.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <time.h> +#include <sys/utsname.h> +#include <sys/wait.h> +#include <zlib.h> +#include <libelf.h> +#include <byteswap.h> +#include <getopt.h> +#include <sys/mman.h> +#ifdef USELZO +#include <lzo/lzo1x.h> +#endif +#ifdef USESNAPPY +#include <snappy-c.h> +#endif +#include "common.h" +#include "dwarf_info.h" +#include "diskdump_mod.h" +#include "print_info.h" +#include "sadump_mod.h" +#include <pthread.h> +#include <semaphore.h> +#include <inttypes.h> + +#define VMEMMAPSTART 0xffffea0000000000UL +#define BITS_PER_WORD 64 + +/* + * Result of command + */ +#define COMPLETED (0) +#define FAILED (1) +#define WRONG_RELEASE (2) /* utsname.release does not match. */ + +/* + * Type of memory management + */ +enum { + NOT_FOUND_MEMTYPE, + SPARSEMEM, + SPARSEMEM_EX, + DISCONTIGMEM, + FLATMEM +}; + +int get_mem_type(void); + +/* + * Page flags + * + * The flag values of page.flags have been defined by enum since linux-2.6.26. + * The following values are for linux-2.6.25 or former. + */ +#define PG_lru_ORIGINAL (5) +#define PG_slab_ORIGINAL (7) +#define PG_private_ORIGINAL (11) /* Has something at ->private */ +#define PG_compound_ORIGINAL (14) /* Is part of a compound page */ +#define PG_swapcache_ORIGINAL (15) /* Swap page: swp_entry_t in private */ + +#define PAGE_BUDDY_MAPCOUNT_VALUE_v2_6_38 (-2) +#define PAGE_BUDDY_MAPCOUNT_VALUE_v2_6_39_to_latest_version (-128) + +#define PAGE_FLAGS_SIZE_v2_6_27_to_latest_version (4) + +#define PAGE_MAPPING_ANON (1) + +#define LSEEKED_BITMAP (1) +#define LSEEKED_PDESC (2) +#define LSEEKED_PDATA (3) + +/* + * Xen page flags + */ +#define BITS_PER_LONG (BITPERBYTE * sizeof(long)) +#define PG_shift(idx) (BITS_PER_LONG - (idx)) +#define PG_mask(x, idx) (x ## UL << PG_shift(idx)) + /* Cleared when the owning guest 'frees' this page. */ +#define PGC_allocated PG_mask(1, 1) + /* Page is Xen heap? */ +#define PGC_xen_heap PG_mask(1, 2) + /* Page is broken? */ +#define PGC_broken PG_mask(1, 7) + /* Mutually-exclusive page states: { inuse, offlining, offlined, free }. */ +#define PGC_state PG_mask(3, 9) +#define PGC_state_inuse PG_mask(0, 9) +#define PGC_state_offlining PG_mask(1, 9) +#define PGC_state_offlined PG_mask(2, 9) +#define PGC_state_free PG_mask(3, 9) +#define page_state_is(ci, st) (((ci)&PGC_state) == PGC_state_##st) + + /* Count of references to this frame. */ +#define PGC_count_width PG_shift(9) +#define PGC_count_mask ((1UL<<PGC_count_width)-1) + +/* + * Memory flags + */ +#define MEMORY_PAGETABLE_4L (1 << 0) +#define MEMORY_PAGETABLE_3L (1 << 1) +#define MEMORY_X86_PAE (1 << 2) + +/* + * Type of address + */ +enum { + VADDR, + PADDR, + VADDR_XEN, +}; + +/* + * State of mmap(2) + */ +enum { + MMAP_DISABLE, + MMAP_TRY, + MMAP_ENABLE, +}; + +static inline int +test_bit(int nr, unsigned long addr) +{ + int mask; + + mask = 1 << (nr & 0x1f); + return ((mask & addr) != 0); +} + +#define isLRU(flags) test_bit(NUMBER(PG_lru), flags) +#define isPrivate(flags) test_bit(NUMBER(PG_private), flags) +#define isCompoundHead(flags) (!!((flags) & NUMBER(PG_head_mask))) +#define isSwapCache(flags) test_bit(NUMBER(PG_swapcache), flags) +#define isSwapBacked(flags) test_bit(NUMBER(PG_swapbacked), flags) +#define isHWPOISON(flags) (test_bit(NUMBER(PG_hwpoison), flags) \ + && (NUMBER(PG_hwpoison) != NOT_FOUND_NUMBER)) + +static inline int +isAnon(unsigned long mapping) +{ + return ((unsigned long)mapping & PAGE_MAPPING_ANON) != 0; +} + +#define PTOB(X) (((unsigned long long)(X)) << PAGESHIFT()) +#define BTOP(X) (((unsigned long long)(X)) >> PAGESHIFT()) + +#define PAGESIZE() (info->page_size) +#define PAGESHIFT() (info->page_shift) +#define PAGEOFFSET(X) (((unsigned long long)(X)) & (PAGESIZE() - 1)) +#define PAGEBASE(X) (((unsigned long long)(X)) & ~(PAGESIZE() - 1)) + +/* + * for SPARSEMEM + */ +#define SECTION_SIZE_BITS() (info->section_size_bits) +#define MAX_PHYSMEM_BITS() (info->max_physmem_bits) +#define PFN_SECTION_SHIFT() (SECTION_SIZE_BITS() - PAGESHIFT()) +#define PAGES_PER_SECTION() (1UL << PFN_SECTION_SHIFT()) +#define _SECTIONS_PER_ROOT() (1) +#define _SECTIONS_PER_ROOT_EXTREME() (info->page_size / SIZE(mem_section)) +#define SECTIONS_PER_ROOT() (info->sections_per_root) +#define SECTION_ROOT_MASK() (SECTIONS_PER_ROOT() - 1) +#define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT()) +#define SECTION_MARKED_PRESENT (1UL<<0) +#define SECTION_IS_ONLINE (1UL<<2) +/* + * SECTION_MAP_LAST_BIT was 1UL<<2 before Linux 4.13.0. + * However, we always use the higher value, because: + * 1. at least one distributor backported commit 2d070eab2e82 to kernel + * version 4.12, + * 2. it has been verified that (1UL<<2) was never set, so it is + * safe to mask that bit off even in old kernels. + */ +#define SECTION_MAP_LAST_BIT (1UL<<3) +#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) +#define NR_SECTION_ROOTS() divideup(num_section, SECTIONS_PER_ROOT()) +#define SECTION_NR_TO_PFN(sec) ((sec) << PFN_SECTION_SHIFT()) +#define SECTIONS_SHIFT() (MAX_PHYSMEM_BITS() - SECTION_SIZE_BITS()) +#define NR_MEM_SECTIONS() (1UL << SECTIONS_SHIFT()) + +/* + * Dump Level + */ +#define MIN_DUMP_LEVEL (0) +#define MAX_DUMP_LEVEL (31) +#define NUM_ARRAY_DUMP_LEVEL (MAX_DUMP_LEVEL + 1) /* enough to allocate + all the dump_level */ +#define DL_EXCLUDE_ZERO (0x001) /* Exclude Pages filled with Zeros */ +#define DL_EXCLUDE_CACHE (0x002) /* Exclude Cache Pages + without Private Pages */ +#define DL_EXCLUDE_CACHE_PRI (0x004) /* Exclude Cache Pages + with Private Pages */ +#define DL_EXCLUDE_USER_DATA (0x008) /* Exclude UserProcessData Pages */ +#define DL_EXCLUDE_FREE (0x010) /* Exclude Free Pages */ + + +/* + * For parse_line() + */ +#define NULLCHAR ('\0') +#define MAXARGS (100) /* max number of arguments to one function */ +#define LASTCHAR(s) (s[strlen(s)-1]) + +#define BITPERBYTE (8) +#define PGMM_CACHED (512) +#define PFN_EXCLUDED (256) +#define BUFSIZE (1024) +#define BUFSIZE_FGETS (1500) +#define BUFSIZE_BITMAP (4096) +#define PFN_BUFBITMAP (BITPERBYTE*BUFSIZE_BITMAP) +#define FILENAME_BITMAP "kdump_bitmapXXXXXX" +#define FILENAME_STDOUT "STDOUT" +#define MAP_REGION (4096*1024) + +/* + * Minimam vmcore has 2 ProgramHeaderTables(PT_NOTE and PT_LOAD). + */ +#define MIN_ELF32_HEADER_SIZE \ + sizeof(Elf32_Ehdr)+sizeof(Elf32_Phdr)+sizeof(Elf32_Phdr) +#define MIN_ELF64_HEADER_SIZE \ + sizeof(Elf64_Ehdr)+sizeof(Elf64_Phdr)+sizeof(Elf64_Phdr) +#define MIN_ELF_HEADER_SIZE \ + MAX(MIN_ELF32_HEADER_SIZE, MIN_ELF64_HEADER_SIZE) +static inline int string_exists(char *s) { return (s ? TRUE : FALSE); } +#define STREQ(A, B) (string_exists((char *)A) && \ + string_exists((char *)B) && \ + (strcmp((char *)(A), (char *)(B)) == 0)) +#define STRNEQ(A, B)(string_exists((char *)(A)) && \ + string_exists((char *)(B)) && \ + (strncmp((char *)(A), (char *)(B), strlen((char *)(B))) == 0)) + +#define UCHAR(ADDR) *((unsigned char *)(ADDR)) +#define USHORT(ADDR) *((unsigned short *)(ADDR)) +#define UINT(ADDR) *((unsigned int *)(ADDR)) +#define ULONG(ADDR) *((unsigned long *)(ADDR)) +#define ULONGLONG(ADDR) *((unsigned long long *)(ADDR)) + + +/* + * for symbol + */ +#define INVALID_SYMBOL_DATA (ULONG_MAX) +#define SYMBOL(X) (symbol_table.X) +#define SYMBOL_INIT(symbol, str_symbol) \ +do { \ + SYMBOL(symbol) = get_symbol_addr(str_symbol); \ + if (SYMBOL(symbol) != NOT_FOUND_SYMBOL) \ + SYMBOL(symbol) += info->kaslr_offset; \ +} while (0) +#define SYMBOL_INIT_NEXT(symbol, str_symbol) \ +do { \ + SYMBOL(symbol) = get_next_symbol_addr(str_symbol); \ + if (SYMBOL(symbol) != NOT_FOUND_SYMBOL) \ + SYMBOL(symbol) += info->kaslr_offset; \ +} while (0) +#define WRITE_SYMBOL(str_symbol, symbol) \ +do { \ + if (SYMBOL(symbol) != NOT_FOUND_SYMBOL) { \ + fprintf(info->file_vmcoreinfo, "%s%llx\n", \ + STR_SYMBOL(str_symbol), SYMBOL(symbol)); \ + } \ +} while (0) +#define READ_SYMBOL(str_symbol, symbol) \ +do { \ + if (SYMBOL(symbol) == NOT_FOUND_SYMBOL) { \ + SYMBOL(symbol) = read_vmcoreinfo_symbol(STR_SYMBOL(str_symbol)); \ + if (SYMBOL(symbol) == INVALID_SYMBOL_DATA) \ + return FALSE; \ + if (info->read_text_vmcoreinfo && \ + (SYMBOL(symbol) != NOT_FOUND_SYMBOL) && \ + (SYMBOL(symbol) != INVALID_SYMBOL_DATA)) \ + SYMBOL(symbol) += info->kaslr_offset; \ + } \ +} while (0) + +/* + * for structure + */ +#define SIZE(X) (size_table.X) +#define OFFSET(X) (offset_table.X) +#define ARRAY_LENGTH(X) (array_table.X) +#define SIZE_INIT(X, Y) \ +do { \ + if ((SIZE(X) = get_structure_size(Y, DWARF_INFO_GET_STRUCT_SIZE)) \ + == FAILED_DWARFINFO) \ + return FALSE; \ +} while (0) +#define TYPEDEF_SIZE_INIT(X, Y) \ +do { \ + if ((SIZE(X) = get_structure_size(Y, DWARF_INFO_GET_TYPEDEF_SIZE)) \ + == FAILED_DWARFINFO) \ + return FALSE; \ +} while (0) +#define ENUM_TYPE_SIZE_INIT(X, Y) \ +do { \ + if ((SIZE(X) = get_structure_size(Y, \ + DWARF_INFO_GET_ENUMERATION_TYPE_SIZE)) \ + == FAILED_DWARFINFO) \ + return FALSE; \ +} while (0) +#define OFFSET_INIT(X, Y, Z) \ +do { \ + if ((OFFSET(X) = get_member_offset(Y, Z, DWARF_INFO_GET_MEMBER_OFFSET)) \ + == FAILED_DWARFINFO) \ + return FALSE; \ +} while (0) +#define SYMBOL_ARRAY_LENGTH_INIT(X, Y) \ +do { \ + if ((ARRAY_LENGTH(X) = get_array_length(Y, NULL, DWARF_INFO_GET_SYMBOL_ARRAY_LENGTH)) == FAILED_DWARFINFO) \ + return FALSE; \ +} while (0) +#define SYMBOL_ARRAY_TYPE_INIT(X, Y) \ +do { \ + if ((ARRAY_LENGTH(X) = get_array_length(Y, NULL, DWARF_INFO_CHECK_SYMBOL_ARRAY_TYPE)) == FAILED_DWARFINFO) \ + return FALSE; \ +} while (0) +#define MEMBER_ARRAY_LENGTH_INIT(X, Y, Z) \ +do { \ + if ((ARRAY_LENGTH(X) = get_array_length(Y, Z, DWARF_INFO_GET_MEMBER_ARRAY_LENGTH)) == FAILED_DWARFINFO) \ + return FALSE; \ +} while (0) + +#define WRITE_STRUCTURE_SIZE(str_structure, structure) \ +do { \ + if (SIZE(structure) != NOT_FOUND_STRUCTURE) { \ + fprintf(info->file_vmcoreinfo, "%s%ld\n", \ + STR_SIZE(str_structure), SIZE(structure)); \ + } \ +} while (0) +#define WRITE_MEMBER_OFFSET(str_member, member) \ +do { \ + if (OFFSET(member) != NOT_FOUND_STRUCTURE) { \ + fprintf(info->file_vmcoreinfo, "%s%ld\n", \ + STR_OFFSET(str_member), OFFSET(member)); \ + } \ +} while (0) +#define WRITE_ARRAY_LENGTH(str_array, array) \ +do { \ + if (ARRAY_LENGTH(array) != NOT_FOUND_STRUCTURE) { \ + fprintf(info->file_vmcoreinfo, "%s%ld\n", \ + STR_LENGTH(str_array), ARRAY_LENGTH(array)); \ + } \ +} while (0) +#define READ_STRUCTURE_SIZE(str_structure, structure) \ +do { \ + if (SIZE(structure) == NOT_FOUND_STRUCTURE) { \ + SIZE(structure) = read_vmcoreinfo_long(STR_SIZE(str_structure)); \ + if (SIZE(structure) == INVALID_STRUCTURE_DATA) \ + return FALSE; \ + } \ +} while (0) +#define READ_MEMBER_OFFSET(str_member, member) \ +do { \ + if (OFFSET(member) == NOT_FOUND_STRUCTURE) { \ + OFFSET(member) = read_vmcoreinfo_long(STR_OFFSET(str_member)); \ + if (OFFSET(member) == INVALID_STRUCTURE_DATA) \ + return FALSE; \ + } \ +} while (0) +#define READ_ARRAY_LENGTH(str_array, array) \ +do { \ + if (ARRAY_LENGTH(array) == NOT_FOUND_STRUCTURE) { \ + ARRAY_LENGTH(array) = read_vmcoreinfo_long(STR_LENGTH(str_array)); \ + if (ARRAY_LENGTH(array) == INVALID_STRUCTURE_DATA) \ + return FALSE; \ + } \ +} while (0) + +/* + * for number + */ +#define NUMBER(X) (number_table.X) + +#define ENUM_NUMBER_INIT(number, str_number) \ +do {\ + NUMBER(number) = get_enum_number(str_number); \ + if (NUMBER(number) == FAILED_DWARFINFO) \ + return FALSE; \ +} while (0) +#define WRITE_NUMBER(str_number, number) \ +do { \ + if (NUMBER(number) != NOT_FOUND_NUMBER) { \ + fprintf(info->file_vmcoreinfo, "%s%ld\n", \ + STR_NUMBER(str_number), NUMBER(number)); \ + } \ +} while (0) +#define READ_NUMBER(str_number, number) \ +do { \ + if (NUMBER(number) == NOT_FOUND_NUMBER) { \ + NUMBER(number) = read_vmcoreinfo_long(STR_NUMBER(str_number)); \ + if (NUMBER(number) == INVALID_STRUCTURE_DATA) \ + return FALSE; \ + } \ +} while (0) +#define WRITE_NUMBER_UNSIGNED(str_number, number) \ +do { \ + if (NUMBER(number) != NOT_FOUND_NUMBER) { \ + fprintf(info->file_vmcoreinfo, "%s%lu\n", \ + STR_NUMBER(str_number), NUMBER(number)); \ + } \ +} while (0) +#define READ_NUMBER_UNSIGNED(str_number, number) \ +do { \ + if (NUMBER(number) == NOT_FOUND_NUMBER) { \ + NUMBER(number) = read_vmcoreinfo_ulong(STR_NUMBER(str_number)); \ + if (NUMBER(number) == INVALID_STRUCTURE_DATA) \ + return FALSE; \ + } \ +} while (0) + + +/* + * for source file name + */ +#define SRCFILE(X) (srcfile_table.X) +#define TYPEDEF_SRCFILE_INIT(decl_name, str_decl_name) \ +do { \ + get_source_filename(str_decl_name, SRCFILE(decl_name), DWARF_INFO_GET_TYPEDEF_SRCNAME); \ +} while (0) + +#define WRITE_SRCFILE(str_decl_name, decl_name) \ +do { \ + if (strlen(SRCFILE(decl_name))) { \ + fprintf(info->file_vmcoreinfo, "%s%s\n", \ + STR_SRCFILE(str_decl_name), SRCFILE(decl_name)); \ + } \ +} while (0) + +#define READ_SRCFILE(str_decl_name, decl_name) \ +do { \ + if (strlen(SRCFILE(decl_name)) == 0) { \ + if (!read_vmcoreinfo_string(STR_SRCFILE(str_decl_name), SRCFILE(decl_name))) \ + return FALSE; \ + } \ +} while (0) + +/* + * Macro for getting splitting info. + */ +#define SPLITTING_DUMPFILE(i) info->splitting_info[i].name_dumpfile +#define SPLITTING_FD_BITMAP(i) info->splitting_info[i].fd_bitmap +#define SPLITTING_START_PFN(i) info->splitting_info[i].start_pfn +#define SPLITTING_END_PFN(i) info->splitting_info[i].end_pfn +#define SPLITTING_OFFSET_EI(i) info->splitting_info[i].offset_eraseinfo +#define SPLITTING_SIZE_EI(i) info->splitting_info[i].size_eraseinfo + +/* + * Macro for getting parallel info. + */ +#define FD_MEMORY_PARALLEL(i) info->parallel_info[i].fd_memory +#define FD_BITMAP_MEMORY_PARALLEL(i) info->parallel_info[i].fd_bitmap_memory +#define FD_BITMAP_PARALLEL(i) info->parallel_info[i].fd_bitmap +#define BUF_PARALLEL(i) info->parallel_info[i].buf +#define BUF_OUT_PARALLEL(i) info->parallel_info[i].buf_out +#define MMAP_CACHE_PARALLEL(i) info->parallel_info[i].mmap_cache +#define ZLIB_STREAM_PARALLEL(i) info->parallel_info[i].zlib_stream +#ifdef USELZO +#define WRKMEM_PARALLEL(i) info->parallel_info[i].wrkmem +#endif +/* + * kernel version + * + * NOTE: the format of kernel_version is as follows + * 8 bits major version + * 8 bits minor version + * 16 bits release + * so version 2.6.18 would be encoded as 0x02060012 + * These macros will let us decode that easier + */ +#define KVER_MAJ_SHIFT 24 +#define KVER_MIN_SHIFT 16 +#define KERNEL_VERSION(x,y,z) (((x) << KVER_MAJ_SHIFT) | ((y) << KVER_MIN_SHIFT) | (z)) +#define OLDEST_VERSION KERNEL_VERSION(2, 6, 15)/* linux-2.6.15 */ +#define LATEST_VERSION KERNEL_VERSION(4, 17, 0)/* linux-4.17.0 */ + +/* + * vmcoreinfo in /proc/vmcore + */ +#define VMCOREINFO_BYTES (4096) +#define FILENAME_VMCOREINFO "/tmp/vmcoreinfoXXXXXX" + +/* + * field name of vmcoreinfo file + */ +#define STR_OSRELEASE "OSRELEASE=" +#define STR_PAGESIZE "PAGESIZE=" +#define STR_CRASHTIME "CRASHTIME=" +#define STR_SYMBOL(X) "SYMBOL("X")=" +#define STR_SIZE(X) "SIZE("X")=" +#define STR_OFFSET(X) "OFFSET("X")=" +#define STR_LENGTH(X) "LENGTH("X")=" +#define STR_NUMBER(X) "NUMBER("X")=" +#define STR_SRCFILE(X) "SRCFILE("X")=" +#define STR_CONFIG_X86_PAE "CONFIG_X86_PAE=y" +#define STR_CONFIG_PGTABLE_4 "CONFIG_PGTABLE_4=y" +#define STR_CONFIG_PGTABLE_3 "CONFIG_PGTABLE_3=y" +#define STR_KERNELOFFSET "KERNELOFFSET=" + +/* + * common value + */ +#define NOSPACE (-1) /* code of write-error due to nospace */ +#define DEFAULT_ORDER (4) +#define TIMEOUT_STDIN (600) +#define SIZE_BUF_STDIN (4096) +#define STRLEN_OSRELEASE (65) /* same length as diskdump.h */ + +/* + * The value of dependence on machine + */ +#define PAGE_OFFSET (info->page_offset) +#define VMALLOC_START (info->vmalloc_start) +#define VMALLOC_END (info->vmalloc_end) +#define VMEMMAP_START (info->vmemmap_start) +#define VMEMMAP_END (info->vmemmap_end) +#define PMASK (0x7ffffffffffff000UL) + +#ifdef __aarch64__ +unsigned long get_kvbase_arm64(void); +#define KVBASE get_kvbase_arm64() +#endif /* aarch64 */ + +#ifdef __arm__ +#define KVBASE_MASK (0xffff) +#define KVBASE (SYMBOL(_stext) & ~KVBASE_MASK) +#define _SECTION_SIZE_BITS (28) +#define _MAX_PHYSMEM_BITS (32) +#define ARCH_PFN_OFFSET (info->phys_base >> PAGESHIFT()) + +#define PTRS_PER_PTE (512) +#define PGDIR_SHIFT (21) +#define PMD_SHIFT (21) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE - 1)) + +#define _PAGE_PRESENT (1 << 0) + +#endif /* arm */ + +#ifdef __x86__ +#define __PAGE_OFFSET (0xc0000000) +#define __VMALLOC_RESERVE (128 << 20) +#define MAXMEM (-PAGE_OFFSET-__VMALLOC_RESERVE) +#define KVBASE_MASK (0x7fffff) +#define KVBASE (SYMBOL(_stext) & ~KVBASE_MASK) +#define _SECTION_SIZE_BITS (26) +#define _SECTION_SIZE_BITS_PAE_ORIG (30) +#define _SECTION_SIZE_BITS_PAE_2_6_26 (29) +#define _MAX_PHYSMEM_BITS (32) +#define _MAX_PHYSMEM_BITS_PAE (36) + +#define PGDIR_SHIFT_3LEVEL (30) +#define PTRS_PER_PTE_3LEVEL (512) +#define PTRS_PER_PGD_3LEVEL (4) +#define PMD_SHIFT (21) /* only used by PAE translators */ +#define PTRS_PER_PMD (512) /* only used by PAE translators */ +#define PTE_SHIFT (12) /* only used by PAE translators */ +#define PTRS_PER_PTE (512) /* only used by PAE translators */ + +#define pgd_index_PAE(address) (((address) >> PGDIR_SHIFT_3LEVEL) & (PTRS_PER_PGD_3LEVEL - 1)) +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +#define pte_index(address) (((address) >> PTE_SHIFT) & (PTRS_PER_PTE - 1)) + +#define _PAGE_PRESENT (0x001) +#define _PAGE_PSE (0x080) + +/* Physical addresses are up to 52 bits (AMD64). + * Mask off bits 52-62 (reserved) and bit 63 (NX). + */ +#define ENTRY_MASK (~0xfff0000000000fffULL) + +#endif /* x86 */ + +#ifdef __x86_64__ +#define __PAGE_OFFSET_ORIG (0xffff810000000000) /* 2.6.26, or former */ +#define __PAGE_OFFSET_2_6_27 (0xffff880000000000) /* 2.6.27, or later */ +#define __PAGE_OFFSET_5LEVEL (0xff10000000000000) /* 5-level page table */ + +#define VMALLOC_START_ORIG (0xffffc20000000000) /* 2.6.30, or former */ +#define VMALLOC_START_2_6_31 (0xffffc90000000000) /* 2.6.31, or later */ +#define VMALLOC_START_5LEVEL (0xffa0000000000000) /* 5-level page table */ +#define VMALLOC_END_ORIG (0xffffe1ffffffffff) /* 2.6.30, or former */ +#define VMALLOC_END_2_6_31 (0xffffe8ffffffffff) /* 2.6.31, or later */ +#define VMALLOC_END_5LEVEL (0xffd1ffffffffffff) /* 5-level page table */ + +#define VMEMMAP_START_ORIG (0xffffe20000000000) /* 2.6.30, or former */ +#define VMEMMAP_START_2_6_31 (0xffffea0000000000) /* 2.6.31, or later */ +#define VMEMMAP_START_5LEVEL (0xffd4000000000000) /* 5-level page table */ +#define VMEMMAP_END_ORIG (0xffffe2ffffffffff) /* 2.6.30, or former */ +#define VMEMMAP_END_2_6_31 (0xffffeaffffffffff) /* 2.6.31, or later */ +#define VMEMMAP_END_5LEVEL (0xffd5ffffffffffff) /* 5-level page table */ + +#define __START_KERNEL_map (0xffffffff80000000) +#define KVBASE PAGE_OFFSET +#define _SECTION_SIZE_BITS (27) +#define _MAX_PHYSMEM_BITS_ORIG (40) +#define _MAX_PHYSMEM_BITS_2_6_26 (44) +#define _MAX_PHYSMEM_BITS_2_6_31 (46) +#define _MAX_PHYSMEM_BITS_5LEVEL (52) + +/* + * 4 Levels paging + */ +#define PGD_SHIFT (39) +#define PUD_SHIFT (30) +#define PMD_SHIFT (21) +#define PTE_SHIFT (12) + +#define PTRS_PER_PGD (512) +#define PTRS_PER_PUD (512) +#define PTRS_PER_PMD (512) +#define PTRS_PER_PTE (512) + +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1)) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE - 1)) + +/* + * 5 Levels paging + */ +#define PGD_SHIFT_5LEVEL (48) +#define P4D_SHIFT (39) + +#define PTRS_PER_PGD_5LEVEL (512) +#define PTRS_PER_P4D (512) + +#define pgd5_index(address) (((address) >> PGD_SHIFT_5LEVEL) & (PTRS_PER_PGD_5LEVEL - 1)) +#define pgd_index(address) (((address) >> PGD_SHIFT) & (PTRS_PER_PGD - 1)) +#define p4d_index(address) (((address) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +#define pte_index(address) (((address) >> PTE_SHIFT) & (PTRS_PER_PTE - 1)) + +#define _PAGE_PRESENT (0x001) +#define _PAGE_PSE (0x080) /* 2MB or 1GB page */ + +#endif /* x86_64 */ + +#ifdef __powerpc64__ +#define __PAGE_OFFSET (0xc000000000000000) +#define KERNELBASE PAGE_OFFSET +#define VMALLOCBASE (0xD000000000000000) +#define KVBASE (SYMBOL(_stext)) +#define _SECTION_SIZE_BITS (24) +#define _MAX_PHYSMEM_BITS_ORIG (44) +#define _MAX_PHYSMEM_BITS_3_7 (46) +#define REGION_SHIFT (60UL) +#define VMEMMAP_REGION_ID (0xfUL) + +/* 4-level page table support */ + +/* 4K pagesize */ +#define PTE_INDEX_SIZE_L4_4K 9 +#define PMD_INDEX_SIZE_L4_4K 7 +#define PUD_INDEX_SIZE_L4_4K 7 +#define PGD_INDEX_SIZE_L4_4K 9 +#define PUD_INDEX_SIZE_L4_4K_3_7 9 +#define PTE_INDEX_SIZE_RADIX_4K 9 +#define PMD_INDEX_SIZE_RADIX_4K 9 +#define PUD_INDEX_SIZE_RADIX_4K 9 +#define PGD_INDEX_SIZE_RADIX_4K 13 +#define PTE_RPN_SHIFT_L4_4K 17 +#define PTE_RPN_SHIFT_L4_4K_4_5 18 +#define PGD_MASKED_BITS_4K 0 +#define PUD_MASKED_BITS_4K 0 +#define PMD_MASKED_BITS_4K 0 + +/* 64K pagesize */ +#define PTE_INDEX_SIZE_L4_64K 12 +#define PMD_INDEX_SIZE_L4_64K 12 +#define PUD_INDEX_SIZE_L4_64K 0 +#define PGD_INDEX_SIZE_L4_64K 4 +#define PTE_INDEX_SIZE_L4_64K_3_10 8 +#define PMD_INDEX_SIZE_L4_64K_3_10 10 +#define PGD_INDEX_SIZE_L4_64K_3_10 12 +#define PMD_INDEX_SIZE_L4_64K_4_6 5 +#define PUD_INDEX_SIZE_L4_64K_4_6 5 +#define PMD_INDEX_SIZE_L4_64K_4_12 10 +#define PUD_INDEX_SIZE_L4_64K_4_12 7 +#define PGD_INDEX_SIZE_L4_64K_4_12 8 +#define PUD_INDEX_SIZE_L4_64K_4_17 10 +#define PTE_INDEX_SIZE_RADIX_64K 5 +#define PMD_INDEX_SIZE_RADIX_64K 9 +#define PUD_INDEX_SIZE_RADIX_64K 9 +#define PGD_INDEX_SIZE_RADIX_64K 13 +#define PTE_RPN_SHIFT_L4_64K_V1 32 +#define PTE_RPN_SHIFT_L4_64K_V2 30 +#define PGD_MASKED_BITS_64K 0 +#define PUD_MASKED_BITS_64K 0x1ff +#define PMD_MASKED_BITS_64K 0x1ff +#define PMD_MASKED_BITS_64K_3_11 0xfff +#define PGD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL +#define PUD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL +#define PMD_MASKED_BITS_64K_4_6 0xc0000000000000ffUL + +#define PTE_RPN_MASK_DEFAULT 0xffffffffffffffffUL +#define PTE_RPN_SIZE_L4_4_6 (info->page_size == 65536 ? 41 : 45) +#define PTE_RPN_MASK_L4_4_6 (((1UL << PTE_RPN_SIZE_L4_4_6) - 1) << info->page_shift) +#define PTE_RPN_SHIFT_L4_4_6 info->page_shift + +#define PGD_MASKED_BITS_4_7 0xc0000000000000ffUL +#define PUD_MASKED_BITS_4_7 0xc0000000000000ffUL +#define PMD_MASKED_BITS_4_7 0xc0000000000000ffUL + +#define PTE_RPN_SIZE_L4_4_11 53 +#define PTE_RPN_MASK_L4_4_11 \ + (((1UL << PTE_RPN_SIZE_L4_4_11) - 1) & ~((1UL << info->page_shift) - 1)) +#define PTE_RPN_SHIFT_L4_4_11 info->page_shift + +/* + * Supported MMU types + */ +#define STD_MMU 0x0 +/* + * The flag bit for radix MMU in cpu_spec.mmu_features + * in the kernel. Use the same flag here. + */ +#define RADIX_MMU 0x40 + + +#define PGD_MASK_L4 \ + (info->kernel_version >= KERNEL_VERSION(3, 10, 0) ? (info->ptrs_per_pgd - 1) : 0x1ff) +#define PGD_OFFSET_L4(vaddr) ((vaddr >> (info->l4_shift)) & PGD_MASK_L4) + +#define PUD_OFFSET_L4(vaddr) \ + ((vaddr >> (info->l3_shift)) & (info->ptrs_per_l3 - 1)) + +#define PMD_OFFSET_L4(vaddr) \ + ((vaddr >> (info->l2_shift)) & (info->ptrs_per_l2 - 1)) + +#define _PAGE_PRESENT \ + (info->kernel_version >= KERNEL_VERSION(4, 6, 0) ? \ + (0x1UL << 63) : (info->kernel_version >= KERNEL_VERSION(4, 5, 0) ? \ + 0x2UL : 0x1UL)) + +#endif + +#ifdef __powerpc32__ + +#define __PAGE_OFFSET (0xc0000000) +#define KERNELBASE PAGE_OFFSET +#define VMALL_START (info->vmalloc_start) +#define KVBASE (SYMBOL(_stext)) +#define _SECTION_SIZE_BITS (24) +#define _MAX_PHYSMEM_BITS (44) + +#endif + +#ifdef __s390x__ +#define __PAGE_OFFSET (info->page_size - 1) +#define KERNELBASE (0) +#define KVBASE KERNELBASE +#define _SECTION_SIZE_BITS (28) +#define _MAX_PHYSMEM_BITS_ORIG (42) +#define _MAX_PHYSMEM_BITS_3_3 (46) + +/* Bits in the segment/region table address-space-control-element */ +#define _ASCE_TYPE_MASK 0x0c +#define _ASCE_TABLE_LENGTH 0x03 /* region table length */ + +#define TABLE_LEVEL(x) (((x) & _ASCE_TYPE_MASK) >> 2) +#define TABLE_LENGTH(x) ((x) & _ASCE_TABLE_LENGTH) + +/* Bits in the region table entry */ +#define _REGION_ENTRY_ORIGIN ~0xfffUL /* region table origin*/ +#define _REGION_ENTRY_TYPE_MASK 0x0c /* region table type mask */ +#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ +#define _REGION_ENTRY_LENGTH 0x03 /* region table length */ +#define _REGION_ENTRY_LARGE 0x400 +#define _REGION_OFFSET_MASK 0x7ffUL /* region/segment table offset mask */ + +#define RSG_TABLE_LEVEL(x) (((x) & _REGION_ENTRY_TYPE_MASK) >> 2) +#define RSG_TABLE_LENGTH(x) ((x) & _REGION_ENTRY_LENGTH) + +/* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL +#define _SEGMENT_ENTRY_LARGE 0x400 +#define _SEGMENT_ENTRY_CO 0x100 +#define _SEGMENT_PAGE_SHIFT 31 +#define _SEGMENT_INDEX_SHIFT 20 + +/* Hardware bits in the page table entry */ +#define _PAGE_ZERO 0x800 /* Bit pos 52 must conatin zero */ +#define _PAGE_INVALID 0x400 /* HW invalid bit */ +#define _PAGE_INDEX_SHIFT 12 +#define _PAGE_OFFSET_MASK 0xffUL /* page table offset mask */ + +#endif /* __s390x__ */ + +#ifdef __ia64__ /* ia64 */ +#define REGION_SHIFT (61) + +#define KERNEL_CACHED_REGION (7) +#define KERNEL_UNCACHED_REGION (6) +#define KERNEL_VMALLOC_REGION (5) +#define USER_STACK_REGION (4) +#define USER_DATA_REGION (3) +#define USER_TEXT_REGION (2) +#define USER_SHMEM_REGION (1) +#define USER_IA32_EMUL_REGION (0) + +#define KERNEL_CACHED_BASE ((unsigned long)KERNEL_CACHED_REGION << REGION_SHIFT) +#define KERNEL_UNCACHED_BASE ((unsigned long)KERNEL_UNCACHED_REGION << REGION_SHIFT) +#define KERNEL_VMALLOC_BASE ((unsigned long)KERNEL_VMALLOC_REGION << REGION_SHIFT) + +#define KVBASE KERNEL_VMALLOC_BASE +#define _PAGE_SIZE_64M (26) +#define KERNEL_TR_PAGE_SIZE (1 << _PAGE_SIZE_64M) +#define KERNEL_TR_PAGE_MASK (~(KERNEL_TR_PAGE_SIZE - 1)) +#define DEFAULT_PHYS_START (KERNEL_TR_PAGE_SIZE * 1) +#define _SECTION_SIZE_BITS (30) +#define _MAX_PHYSMEM_BITS (50) + +/* + * 3 Levels paging + */ +#define _PAGE_PPN_MASK (((1UL << _MAX_PHYSMEM_BITS) - 1) & ~0xfffUL) +#define PTRS_PER_PTD_SHIFT (PAGESHIFT() - 3) + +#define PMD_SHIFT (PAGESHIFT() + PTRS_PER_PTD_SHIFT) +#define PGDIR_SHIFT_3L (PMD_SHIFT + PTRS_PER_PTD_SHIFT) + +#define MASK_POFFSET ((1UL << PAGESHIFT()) - 1) +#define MASK_PTE ((1UL << PMD_SHIFT) - 1) &~((1UL << PAGESHIFT()) - 1) +#define MASK_PMD ((1UL << PGDIR_SHIFT_3L) - 1) &~((1UL << PMD_SHIFT) - 1) +#define MASK_PGD_3L ((1UL << REGION_SHIFT) - 1) & (~((1UL << PGDIR_SHIFT_3L) - 1)) + +/* + * 4 Levels paging + */ +#define PUD_SHIFT (PMD_SHIFT + PTRS_PER_PTD_SHIFT) +#define PGDIR_SHIFT_4L (PUD_SHIFT + PTRS_PER_PTD_SHIFT) + +#define MASK_PUD ((1UL << REGION_SHIFT) - 1) & (~((1UL << PUD_SHIFT) - 1)) +#define MASK_PGD_4L ((1UL << REGION_SHIFT) - 1) & (~((1UL << PGDIR_SHIFT_4L) - 1)) + +/* + * Key for distinguishing PGTABLE_3L or PGTABLE_4L. + */ +#define STR_PUD_T_3L "include/asm-generic/pgtable-nopud.h" +#define STR_PUD_T_4L "include/asm/page.h" + +#endif /* ia64 */ + +#ifdef __sparc64__ + +#define KVBASE (SYMBOL(_stext)) +#define KVBASE_MASK (0xffff) +#define _SECTION_SIZE_BITS (30) +#define _MAX_PHYSMEM_BITS_L3 (49) +#define _MAX_PHYSMEM_BITS_L4 (53) +#define VMALLOC_START_SPARC64 (0x0000000100000000UL) +#define VMEMMAP_BASE_SPARC64 (0x0000010000000000UL) +#define VMEMMAP_CHUNK_SHIFT (22) +#define VMEMMAP_CHUNK (1UL << VMEMMAP_CHUNK_SHIFT) +#define VMEMMAP_CHUNK_MASK (~(VMEMMAP_CHUNK - 1UL)) + +#define PAGE_SHIFT 13 +#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +#define MAX_PHYS_ADDRESS_LOBITS (41) +#define NR_CHUNKS_SHIFT (MAX_PHYS_ADDRESS_LOBITS - PAGE_SHIFT + 6) +#define NR_CHUNKS_MASK (~((1UL << NR_CHUNKS_SHIFT) - 1)) + +#define PMD_SHIFT (PAGE_SHIFT + (PAGE_SHIFT - 3)) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE - 1)) +#define PMD_BITS (PAGE_SHIFT - 3) + +#define PUD_SHIFT (PMD_SHIFT + PMD_BITS) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1)) +#define PUD_BITS (PAGE_SHIFT - 3) + +#define PGDIR_SHIFT_L4 (PUD_SHIFT + PUD_BITS) +#define PGDIR_SIZE_L4 (1UL << PGDIR_SHIFT_L4) +#define PGDIR_MASK_L4 (~(PGDIR_SIZE_L4 - 1)) + +#define PGDIR_SHIFT_L3 (PMD_SHIFT + PMD_BITS) +#define PGDIR_SIZE_L3 (1UL << PGDIR_SHIFT_L3) +#define PGDIR_MASK_L3 (~(PGDIR_SIZE_L3 - 1)) + +#define PGDIR_BITS (PAGE_SHIFT - 3) + +#define PTRS_PER_PTE (1UL << (PAGE_SHIFT - 3)) +#define PTRS_PER_PMD (1UL << PMD_BITS) +#define PTRS_PER_PUD (1UL << PUD_BITS) +#define PTRS_PER_PGD (1UL << PGDIR_BITS) + +#define _PAGE_PMD_HUGE (0x0100000000000000UL) +#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE +#define _PAGE_PADDR_4V (0x00FFFFFFFFFFE000UL) +#define _PAGE_PRESENT_4V (0x0000000000000010UL) + +typedef unsigned long pte_t; +typedef unsigned long pmd_t; +typedef unsigned long pud_t; +typedef unsigned long pgd_t; + +#define pud_none(pud) (!(pud)) +#define pgd_none(pgd) (!(pgd)) +#define pmd_none(pmd) (!(pmd)) + +#define pte_to_pa(pte) (pte & _PAGE_PADDR_4V) + +#define pgd_index_l4(addr) (((addr) >> PGDIR_SHIFT_L4) & (PTRS_PER_PGD - 1)) +#define pgd_offset_l4(pgdir,addr) ((unsigned long) \ + ((pgd_t *)pgdir + pgd_index_l4(addr))) + +#define pgd_index_l3(addr) (((addr) >> PGDIR_SHIFT_L3) & (PTRS_PER_PGD - 1)) +#define pgd_offset_l3(pgdir,addr) ((unsigned long) \ + ((pgd_t *)pgdir + pgd_index_l3(addr))) + +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pud_offset(pgdp, addr) ((unsigned long) \ + ((pud_t *)pgdp + pud_index(addr))) +#define pud_large(pud) (pud & _PAGE_PUD_HUGE) + +#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) +#define pmd_offset(pudp, addr) ((unsigned long) \ + ((pmd_t *)pudp + pmd_index(addr))) +#define pmd_large(pmd) (pmd & _PAGE_PMD_HUGE) + +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset(pmdp, addr) ((unsigned long) \ + ((pte_t *)(pte_to_pa(pmdp) + pte_index(addr)))) +#define pte_present(pte) (pte & _PAGE_PRESENT_4V) + +#endif /* sparc64 */ + +/* + * The function of dependence on machine + */ +static inline int stub_true() { return TRUE; } +static inline int stub_true_ul(unsigned long x) { return TRUE; } +static inline int stub_false() { return FALSE; } +#ifdef __aarch64__ +int get_phys_base_arm64(void); +int get_machdep_info_arm64(void); +unsigned long long vaddr_to_paddr_arm64(unsigned long vaddr); +int get_versiondep_info_arm64(void); +int get_xen_basic_info_arm64(void); +int get_xen_info_arm64(void); +#define find_vmemmap() stub_false() +#define vaddr_to_paddr(X) vaddr_to_paddr_arm64(X) +#define get_phys_base() get_phys_base_arm64() +#define get_machdep_info() get_machdep_info_arm64() +#define get_versiondep_info() get_versiondep_info_arm64() +#define get_kaslr_offset(X) stub_false() +#define get_xen_basic_info_arch(X) get_xen_basic_info_arm64(X) +#define get_xen_info_arch(X) get_xen_info_arm64(X) +#define is_phys_addr(X) stub_true_ul(X) +#define arch_crashkernel_mem_size() stub_false() +#endif /* aarch64 */ + +#ifdef __arm__ +int get_phys_base_arm(void); +int get_machdep_info_arm(void); +unsigned long long vaddr_to_paddr_arm(unsigned long vaddr); +#define find_vmemmap() stub_false() +#define get_phys_base() get_phys_base_arm() +#define get_machdep_info() get_machdep_info_arm() +#define get_versiondep_info() stub_true() +#define get_kaslr_offset(X) stub_false() +#define vaddr_to_paddr(X) vaddr_to_paddr_arm(X) +#define is_phys_addr(X) stub_true_ul(X) +#define arch_crashkernel_mem_size() stub_false() +#endif /* arm */ + +#ifdef __x86__ +int get_machdep_info_x86(void); +int get_versiondep_info_x86(void); +unsigned long long vaddr_to_paddr_x86(unsigned long vaddr); +#define find_vmemmap() stub_false() +#define get_phys_base() stub_true() +#define get_machdep_info() get_machdep_info_x86() +#define get_versiondep_info() get_versiondep_info_x86() +#define get_kaslr_offset(X) stub_false() +#define vaddr_to_paddr(X) vaddr_to_paddr_x86(X) +#define is_phys_addr(X) stub_true_ul(X) +#define arch_crashkernel_mem_size() stub_false() +#endif /* x86 */ + +#ifdef __x86_64__ +unsigned long get_kaslr_offset_x86_64(unsigned long vaddr); +int get_phys_base_x86_64(void); +int get_machdep_info_x86_64(void); +int get_versiondep_info_x86_64(void); +unsigned long long vtop4_x86_64(unsigned long vaddr); +unsigned long long vtop4_x86_64_pagetable(unsigned long vaddr, unsigned long pagetable); +#define find_vmemmap() find_vmemmap_x86_64() +#define get_phys_base() get_phys_base_x86_64() +#define get_machdep_info() get_machdep_info_x86_64() +#define get_versiondep_info() get_versiondep_info_x86_64() +#define get_kaslr_offset(X) get_kaslr_offset_x86_64(X) +#define vaddr_to_paddr(X) vtop4_x86_64(X) +#define is_phys_addr(X) stub_true_ul(X) +#define arch_crashkernel_mem_size() stub_false() +#endif /* x86_64 */ + +#ifdef __powerpc64__ /* powerpc64 */ +int get_machdep_info_ppc64(void); +int get_versiondep_info_ppc64(void); +unsigned long long vaddr_to_paddr_ppc64(unsigned long vaddr); +int arch_crashkernel_mem_size_ppc64(void); +#define find_vmemmap() stub_false() +#define get_phys_base() stub_true() +#define get_machdep_info() get_machdep_info_ppc64() +#define get_versiondep_info() get_versiondep_info_ppc64() +#define get_kaslr_offset(X) stub_false() +#define vaddr_to_paddr(X) vaddr_to_paddr_ppc64(X) +#define is_phys_addr(X) stub_true_ul(X) +#define arch_crashkernel_mem_size() arch_crashkernel_mem_size_ppc64() +#endif /* powerpc64 */ + +#ifdef __powerpc32__ /* powerpc32 */ +int get_machdep_info_ppc(void); +unsigned long long vaddr_to_paddr_ppc(unsigned long vaddr); +#define find_vmemmap() stub_false() +#define get_phys_base() stub_true() +#define get_machdep_info() get_machdep_info_ppc() +#define get_versiondep_info() stub_true() +#define get_kaslr_offset(X) stub_false() +#define vaddr_to_paddr(X) vaddr_to_paddr_ppc(X) +#define is_phys_addr(X) stub_true_ul(X) +#define arch_crashkernel_mem_size() stub_false() +#endif /* powerpc32 */ + +#ifdef __s390x__ /* s390x */ +int get_machdep_info_s390x(void); +unsigned long long vaddr_to_paddr_s390x(unsigned long vaddr); +int is_iomem_phys_addr_s390x(unsigned long addr); +#define find_vmemmap() stub_false() +#define get_phys_base() stub_true() +#define get_machdep_info() get_machdep_info_s390x() +#define get_versiondep_info() stub_true() +#define get_kaslr_offset(X) stub_false() +#define vaddr_to_paddr(X) vaddr_to_paddr_s390x(X) +#define is_phys_addr(X) is_iomem_phys_addr_s390x(X) +#define arch_crashkernel_mem_size() stub_false() +#endif /* s390x */ + +#ifdef __ia64__ /* ia64 */ +int get_phys_base_ia64(void); +int get_machdep_info_ia64(void); +unsigned long long vaddr_to_paddr_ia64(unsigned long vaddr); +#define find_vmemmap() stub_false() +#define get_machdep_info() get_machdep_info_ia64() +#define get_phys_base() get_phys_base_ia64() +#define get_versiondep_info() stub_true() +#define get_kaslr_offset(X) stub_false() +#define vaddr_to_paddr(X) vaddr_to_paddr_ia64(X) +#define VADDR_REGION(X) (((unsigned long)(X)) >> REGION_SHIFT) +#define is_phys_addr(X) stub_true_ul(X) +#define arch_crashkernel_mem_size() stub_false() +#endif /* ia64 */ + +#ifdef __sparc64__ /* sparc64 */ +int get_versiondep_info_sparc64(void); +int get_phys_base_sparc64(void); +unsigned long long vaddr_to_paddr_sparc64(unsigned long vaddr); +#define find_vmemmap() stub_false() +#define get_machdep_info() TRUE +#define get_phys_base() get_phys_base_sparc64() +#define get_versiondep_info() get_versiondep_info_sparc64() +#define vaddr_to_paddr(X) vaddr_to_paddr_sparc64(X) +#define is_phys_addr(X) stub_true_ul(X) +#define arch_crashkernel_mem_size() stub_false() +#endif /* sparc64 */ + +typedef unsigned long long mdf_pfn_t; + +#ifndef ARCH_PFN_OFFSET +#define ARCH_PFN_OFFSET 0 +#endif +#define paddr_to_pfn(X) \ + (((unsigned long long)(X) >> PAGESHIFT()) - ARCH_PFN_OFFSET) +#define pfn_to_paddr(X) \ + (((mdf_pfn_t)(X) + ARCH_PFN_OFFSET) << PAGESHIFT()) + +/* Format of Xen crash info ELF note */ +typedef struct { + unsigned long xen_major_version; + unsigned long xen_minor_version; + unsigned long xen_extra_version; + unsigned long xen_changeset; + unsigned long xen_compiler; + unsigned long xen_compile_date; + unsigned long xen_compile_time; + unsigned long tainted; +} xen_crash_info_com_t; + +typedef struct { + xen_crash_info_com_t com; +#if defined(__x86__) || defined(__x86_64__) + /* added by changeset 2b43fb3afb3e: */ + unsigned long dom0_pfn_to_mfn_frame_list_list; +#endif +#if defined(__ia64__) + /* added by changeset d7c3b12014b3: */ + unsigned long dom0_mm_pgd_mfn; +#endif +} xen_crash_info_t; + +/* changeset 439a3e9459f2 added xen_phys_start + * to the middle of the struct... */ +typedef struct { + xen_crash_info_com_t com; +#if defined(__x86__) || defined(__x86_64__) + unsigned long xen_phys_start; + unsigned long dom0_pfn_to_mfn_frame_list_list; +#endif +#if defined(__ia64__) + unsigned long dom0_mm_pgd_mfn; +#endif +} xen_crash_info_v2_t; + +struct mem_map_data { + mdf_pfn_t pfn_start; + mdf_pfn_t pfn_end; + unsigned long mem_map; +}; + +struct dump_bitmap { + int fd; + int no_block; + char *file_name; + char *buf; + off_t offset; +}; + +struct cache_data { + int fd; + char *file_name; + char *buf; + size_t buf_size; + size_t cache_size; + off_t offset; +}; +typedef unsigned long int ulong; +typedef unsigned long long int ulonglong; + +/* + * for parallel process + */ + +#define PAGE_FLAG_NUM (20) +#define PAGE_DATA_NUM (5) +#define WAIT_TIME (60 * 10) +#define PTHREAD_FAIL ((void *)-2) +#define THREAD_REGION (200 * 1024) + +struct mmap_cache { + char *mmap_buf; + off_t mmap_start_offset; + off_t mmap_end_offset; +}; + +enum { + FLAG_UNUSED, + FLAG_READY, + FLAG_FILLING +}; +struct page_flag { + mdf_pfn_t pfn; + char zero; + char ready; + short index; + struct page_flag *next; +}; + +struct page_data +{ + long size; + unsigned char *buf; + int flags; + int used; +}; + +struct thread_args { + int thread_num; + unsigned long len_buf_out; + struct cycle *cycle; + struct page_data *page_data_buf; + struct page_flag *page_flag_buf; +}; + +/* + * makedumpfile header + * For re-arranging the dump data on different architecture, all the + * variables are defined by 64bits. The size of signature is aligned + * to 64bits, and change the values to big endian. + */ +#define MAKEDUMPFILE_SIGNATURE "makedumpfile" +#define NUM_SIG_MDF (sizeof(MAKEDUMPFILE_SIGNATURE) - 1) +#define SIZE_SIG_MDF roundup(sizeof(char) * NUM_SIG_MDF, 8) +#define SIG_LEN_MDF (SIZE_SIG_MDF / sizeof(char)) +#define MAX_SIZE_MDF_HEADER (4096) /* max size of makedumpfile_header */ +#define TYPE_FLAT_HEADER (1) /* type of flattened format */ +#define VERSION_FLAT_HEADER (1) /* current version of flattened format */ +#define END_FLAG_FLAT_HEADER (-1) + +struct makedumpfile_header { + char signature[SIG_LEN_MDF]; /* = "makedumpfile" */ + int64_t type; + int64_t version; +}; + +struct makedumpfile_data_header { + int64_t offset; + int64_t buf_size; +}; + +struct splitting_info { + char *name_dumpfile; + int fd_bitmap; + mdf_pfn_t start_pfn; + mdf_pfn_t end_pfn; + off_t offset_eraseinfo; + unsigned long size_eraseinfo; +} splitting_info_t; + +struct parallel_info { + int fd_memory; + int fd_bitmap_memory; + int fd_bitmap; + unsigned char *buf; + unsigned char *buf_out; + struct mmap_cache *mmap_cache; + z_stream zlib_stream; +#ifdef USELZO + lzo_bytep wrkmem; +#endif +} parallel_info_t; + +struct ppc64_vmemmap { + unsigned long phys; + unsigned long virt; +}; + +struct DumpInfo { + int32_t kernel_version; /* version of first kernel*/ + struct timeval timestamp; + struct utsname system_utsname; + + /* + * General info: + */ + int dump_level; /* current dump level */ + int max_dump_level; /* maximum dump level */ + int num_dump_level; /* number of dump level */ + int array_dump_level[NUM_ARRAY_DUMP_LEVEL]; + int flag_compress; /* flag of compression */ + int flag_lzo_support; /* flag of LZO compression support */ + int flag_elf_dumpfile; /* flag of creating ELF dumpfile */ + int flag_generate_vmcoreinfo;/* flag of generating vmcoreinfo file */ + int flag_read_vmcoreinfo; /* flag of reading vmcoreinfo file */ + int flag_show_usage; /* flag of showing usage */ + int flag_show_version; /* flag of showing version */ + int flag_flatten; /* flag of outputting flattened + format to a standard out */ + int flag_rearrange; /* flag of creating dumpfile from + flattened format */ + int flag_split; /* splitting vmcore */ + int flag_cyclic; /* multi-cycle processing is necessary */ + int flag_usemmap; /* /proc/vmcore supports mmap(2) */ + int flag_reassemble; /* reassemble multiple dumpfiles into one */ + int flag_refiltering; /* refilter from kdump-compressed file */ + int flag_force; /* overwrite existing stuff */ + int flag_exclude_xen_dom;/* exclude Domain-U from xen-kdump */ + int flag_dmesg; /* dump the dmesg log out of the vmcore file */ + int flag_partial_dmesg; /* dmesg dump only from the last cleared index*/ + int flag_mem_usage; /*show the page number of memory in different use*/ + int flag_use_printk_log; /* did we read printk_log symbol name? */ + int flag_nospace; /* the flag of "No space on device" error */ + int flag_vmemmap; /* kernel supports vmemmap address space */ + int flag_excludevm; /* -e - excluding unused vmemmap pages */ + int flag_use_count; /* _refcount is named _count in struct page */ + unsigned long vaddr_for_vtop; /* virtual address for debugging */ + long page_size; /* size of page */ + long page_shift; + mdf_pfn_t max_mapnr; /* number of page descriptor */ + unsigned long page_offset; + unsigned long section_size_bits; + unsigned long max_physmem_bits; + unsigned long sections_per_root; + unsigned long phys_base; + unsigned long kernel_start; + unsigned long vmalloc_start; + unsigned long vmalloc_end; + unsigned long vmemmap_start; + unsigned long vmemmap_end; + int vmemmap_psize; + int vmemmap_cnt; + struct ppc64_vmemmap *vmemmap_list; + unsigned long kaslr_offset; + + /* + * page table info for ppc64 + */ + int cur_mmu_type; + int ptrs_per_pgd; + uint l4_index_size; + uint l3_index_size; + uint l2_index_size; + uint l1_index_size; + uint ptrs_per_l4; + uint ptrs_per_l3; + uint ptrs_per_l2; + uint ptrs_per_l1; + uint l4_shift; + uint l3_shift; + uint l2_shift; + uint l1_shift; + uint pte_rpn_shift; + ulong pte_rpn_mask; + ulong pgd_masked_bits; + ulong pud_masked_bits; + ulong pmd_masked_bits; + ulong kernel_pgd; + char *page_buf; /* Page buffer to read page tables */ + + /* + * Filter config file containing filter commands to filter out kernel + * data from vmcore. + */ + char *name_filterconfig; + FILE *file_filterconfig; + + /* + * Filter config file containing eppic language filtering rules + * to filter out kernel data from vmcore + */ + char *name_eppic_config; + + /* + * diskdimp info: + */ + int block_order; + off_t offset_bitmap1; + unsigned long len_bitmap; /* size of bitmap(1st and 2nd) */ + struct dump_bitmap *bitmap1; + struct dump_bitmap *bitmap2; + struct disk_dump_header *dump_header; + struct kdump_sub_header sub_header; + + /* + * ELF header info: + */ + unsigned int num_load_dumpfile; + size_t offset_load_dumpfile; + + /* + * mem_map info: + */ + unsigned int num_mem_map; + struct mem_map_data *mem_map_data; + + int fd_vmlinux; + char *name_vmlinux; + + int fd_xen_syms; + char *name_xen_syms; + + /* + * Dump memory image info: + */ + int fd_memory; + char *name_memory; + struct disk_dump_header *dh_memory; + struct kdump_sub_header *kh_memory; + struct dump_bitmap *bitmap_memory; + unsigned long *valid_pages; + + /* + * Dump file info: + */ + int fd_dumpfile; + char *name_dumpfile; + int num_dumpfile; + struct splitting_info *splitting_info; + struct parallel_info *parallel_info; + + /* + * bitmap info: + */ + int fd_bitmap; + char *name_bitmap; + + /* + * vmcoreinfo file info: + */ + FILE *file_vmcoreinfo; + char *name_vmcoreinfo; /* vmcoreinfo file */ + char release[STRLEN_OSRELEASE]; + int read_text_vmcoreinfo; + + /* + * ELF NOTE section in dump memory image info: + */ + off_t offset_note_dumpfile; + + /* + * erased information in dump memory image info: + */ + unsigned long size_elf_eraseinfo; + + /* + * for Xen extraction + */ + union { /* Both versions of Xen crash info: */ + xen_crash_info_com_t *com; /* common fields */ + xen_crash_info_t *v1; /* without xen_phys_start */ + xen_crash_info_v2_t *v2; /* changeset 439a3e9459f2 */ + } xen_crash_info; + int xen_crash_info_v; /* Xen crash info version: + * 0 .. xen_crash_info_com_t + * 1 .. xen_crash_info_t + * 2 .. xen_crash_info_v2_t */ + + mdf_pfn_t dom0_mapnr; /* The number of page in domain-0. + * Different from max_mapnr. + * max_mapnr is the number of page + * in system. */ + unsigned long xen_phys_start; + unsigned long xen_heap_start; /* start mfn of xen heap area */ + unsigned long xen_heap_end; /* end mfn(+1) of xen heap area */ + unsigned long frame_table_vaddr; + unsigned long max_page; + unsigned long alloc_bitmap; + unsigned long dom0; + unsigned long p2m_frames; + unsigned long *p2m_mfn_frame_list; + int num_domain; + struct domain_list *domain_list; +#if defined(__x86_64__) + unsigned long xen_virt_start; + unsigned long directmap_virt_end; +#endif + + /* + * for splitting + */ + mdf_pfn_t split_start_pfn; + mdf_pfn_t split_end_pfn; + + /* + * for cyclic processing + */ + char *working_dir; /* working directory for bitmap */ + mdf_pfn_t num_dumpable; + unsigned long bufsize_cyclic; + unsigned long pfn_cyclic; + + /* + * for mmap + */ + char *mmap_buf; + off_t mmap_start_offset; + off_t mmap_end_offset; + off_t mmap_region_size; + + /* + * sadump info: + */ + int flag_sadump_diskset; + enum sadump_format_type flag_sadump; /* sadump format type */ + /* + * for filtering free pages managed by buddy system: + */ + int (*page_is_buddy)(unsigned long flags, unsigned int _mapcount, + unsigned long private, unsigned int _count); + /* + * for cyclic_splitting mode, setup splitblock_size + */ + long long splitblock_size; + /* + * for parallel process + */ + int num_threads; + int num_buffers; + pthread_t **threads; + struct thread_args *kdump_thread_args; + struct page_data *page_data_buf; + struct page_flag **page_flag_buf; + sem_t page_flag_buf_sem; + pthread_rwlock_t usemmap_rwlock; + mdf_pfn_t current_pfn; + pthread_mutex_t current_pfn_mutex; + pthread_mutex_t page_data_mutex; + pthread_mutex_t filter_mutex; +}; +extern struct DumpInfo *info; + +/* + * for cyclic_splitting mode,Manage memory by splitblock + */ +#define DEFAULT_SPLITBLOCK_SIZE (1LL << 30) + +struct SplitBlock { + char *table; + long long num; + long long page_per_splitblock; + int entry_size; /* counted by byte */ +}; + +/* + * kernel VM-related data + */ +struct vm_table { + int numnodes; + unsigned long *node_online_map; + int node_online_map_len; + unsigned int mem_flags; +}; +extern struct vm_table vt; + +/* + * Loaded module symbols info. + */ +#define MOD_NAME_LEN 64 +#define IN_RANGE(addr, mbase, sz) \ + (((unsigned long)(addr) >= (unsigned long)mbase) \ + && ((unsigned long)addr < (unsigned long)(mbase + sz))) + +struct symbol_info { + char *name; + unsigned long long value; +}; + +struct module_info { + char name[MOD_NAME_LEN]; + unsigned int num_syms; + struct symbol_info *sym_info; +}; + + +struct symbol_table { + unsigned long long mem_map; + unsigned long long vmem_map; + unsigned long long mem_section; + unsigned long long pkmap_count; + unsigned long long pkmap_count_next; + unsigned long long system_utsname; + unsigned long long init_uts_ns; + unsigned long long _stext; + unsigned long long swapper_pg_dir; + unsigned long long init_level4_pgt; + unsigned long long level4_kernel_pgt; + unsigned long long init_top_pgt; + unsigned long long vmlist; + unsigned long long vmap_area_list; + unsigned long long phys_base; + unsigned long long node_online_map; + unsigned long long node_states; + unsigned long long node_memblk; + unsigned long long node_data; + unsigned long long pgdat_list; + unsigned long long contig_page_data; + unsigned long long log_buf; + unsigned long long log_buf_len; + unsigned long long log_end; + unsigned long long log_first_idx; + unsigned long long clear_idx; + unsigned long long log_next_idx; + unsigned long long max_pfn; + unsigned long long node_remap_start_vaddr; + unsigned long long node_remap_end_vaddr; + unsigned long long node_remap_start_pfn; + unsigned long long free_huge_page; + + /* + * for Xen extraction + */ + unsigned long long dom_xen; + unsigned long long dom_io; + unsigned long long domain_list; + unsigned long long frame_table; + unsigned long long xen_heap_start; + unsigned long long pgd_l2; + unsigned long long pgd_l3; + unsigned long long pgd_l4; + unsigned long long xenheap_phys_end; + unsigned long long xen_pstart; + unsigned long long frametable_pg_dir; + unsigned long long max_page; + unsigned long long alloc_bitmap; + + /* + * for loading module symbol data + */ + + unsigned long long modules; + + /* + * vmalloc_start address on s390x arch + */ + unsigned long long high_memory; + + /* + * for sadump + */ + unsigned long long linux_banner; + unsigned long long bios_cpu_apicid; + unsigned long long x86_bios_cpu_apicid; + unsigned long long x86_bios_cpu_apicid_early_ptr; + unsigned long long x86_bios_cpu_apicid_early_map; + unsigned long long crash_notes; + unsigned long long __per_cpu_offset; + unsigned long long __per_cpu_load; + unsigned long long cpu_online_mask; + unsigned long long __cpu_online_mask; + unsigned long long kexec_crash_image; + unsigned long long divide_error; + unsigned long long idt_table; + unsigned long long saved_command_line; + unsigned long long pti_init; + unsigned long long kaiser_init; + + /* + * symbols on ppc64 arch + */ + unsigned long long vmemmap_list; + unsigned long long mmu_vmemmap_psize; + unsigned long long mmu_psize_defs; + unsigned long long cpu_pgd; + unsigned long long demote_segment_4k; + unsigned long long cur_cpu_spec; + + /* + * symbols on sparc64 arch + */ + unsigned long long vmemmap_table; +}; + +struct size_table { + long page; + long mem_section; + long pglist_data; + long zone; + long free_area; + long list_head; + long node_memblk_s; + long nodemask_t; + long printk_log; + + /* + * for Xen extraction + */ + long page_info; + long domain; + + /* + * for loading module symbol data + */ + long module; + + /* + * for sadump + */ + long percpu_data; + long elf_prstatus; + long user_regs_struct; + long cpumask; + long cpumask_t; + long kexec_segment; + long elf64_hdr; + + /* + * symbols on ppc64 arch + */ + long vmemmap_backing; + long mmu_psize_def; + long cpu_spec; + + long pageflags; +}; + +struct offset_table { + struct page { + long flags; + long _refcount; + long mapping; + long lru; + long _mapcount; + long private; + long compound_dtor; + long compound_order; + long compound_head; + } page; + struct mem_section { + long section_mem_map; + } mem_section; + struct zone { + long free_pages; + long free_area; + long vm_stat; + long spanned_pages; + } zone; + struct pglist_data { + long node_zones; + long nr_zones; + long node_mem_map; + long node_start_pfn; + long node_spanned_pages; + long pgdat_next; + } pglist_data; + struct free_area { + long free_list; + } free_area; + struct list_head { + long next; + long prev; + } list_head; + struct node_memblk_s { + long start_paddr; + long size; + long nid; + } node_memblk_s; + struct vm_struct { + long addr; + } vm_struct; + struct vmap_area { + long va_start; + long list; + } vmap_area; + + /* + * for Xen extraction + */ + struct page_info { + long count_info; + long _domain; + } page_info; + struct domain { + long domain_id; + long next_in_list; + } domain; + + /* + * for loading module symbol data + */ + struct module { + long list; + long name; + long module_core; + long core_size; + long module_init; + long init_size; + long num_symtab; + long symtab; + long strtab; + } module; + + /* + * for loading elf_prstaus symbol data + */ + struct elf_prstatus_s { + long pr_reg; + } elf_prstatus; + + /* + * for loading user_regs_struct symbol data + */ + struct user_regs_struct_s { + long r15; + long r14; + long r13; + long r12; + long bp; + long bx; + long r11; + long r10; + long r9; + long r8; + long ax; + long cx; + long dx; + long si; + long di; + long orig_ax; + long ip; + long cs; + long flags; + long sp; + long ss; + long fs_base; + long gs_base; + long ds; + long es; + long fs; + long gs; + } user_regs_struct; + + struct kimage_s { + long segment; + } kimage; + + struct kexec_segment_s { + long mem; + } kexec_segment; + + struct elf64_hdr_s { + long e_phnum; + long e_phentsize; + long e_phoff; + } elf64_hdr; + + struct elf64_phdr_s { + long p_type; + long p_offset; + long p_paddr; + long p_memsz; + } elf64_phdr; + + struct printk_log_s { + long ts_nsec; + long len; + long text_len; + } printk_log; + + /* + * symbols on ppc64 arch + */ + struct mmu_psize_def_s { + long shift; + } mmu_psize_def; + + struct vmemmap_backing_s { + long phys; + long virt_addr; + long list; + } vmemmap_backing; + + struct cpu_spec_s { + long mmu_features; + } cpu_spec; +}; + +/* + * The number of array + */ +struct array_table { + /* + * Symbol + */ + long node_data; + long pgdat_list; + long mem_section; + long node_memblk; + long __per_cpu_offset; + long node_remap_start_pfn; + + /* + * Structure + */ + struct zone_at { + long free_area; + } zone; + struct free_area_at { + long free_list; + } free_area; + struct kimage_at { + long segment; + } kimage; +}; + +struct number_table { + long NR_FREE_PAGES; + long N_ONLINE; + long pgtable_l5_enabled; + + /* + * Page flags + */ + long PG_lru; + long PG_private; + long PG_head; + long PG_head_mask; + long PG_swapcache; + long PG_swapbacked; + long PG_buddy; + long PG_slab; + long PG_hwpoison; + + long PAGE_BUDDY_MAPCOUNT_VALUE; + long SECTION_SIZE_BITS; + long MAX_PHYSMEM_BITS; + long HUGETLB_PAGE_DTOR; + long phys_base; +#ifdef __aarch64__ + long VA_BITS; + unsigned long PHYS_OFFSET; + unsigned long kimage_voffset; +#endif +}; + +struct srcfile_table { + /* + * typedef + */ + char pud_t[LEN_SRCFILE]; +}; + +/* + * This structure records where the vmemmap page structures reside, and which + * pfn's are represented by those page structures. + * The actual pages containing the page structures are 2MB pages, so their pfn's + * will all be multiples of 0x200. + * The page structures are 7 64-bit words in length (0x38) so they overlap the + * 2MB boundaries. Each page structure represents a 4k page. + * A 4k page is here defined to be represented on a 2MB page if its page structure + * 'ends' on that page (even if it began on the page before). + */ +struct vmap_pfns { + struct vmap_pfns *next; + struct vmap_pfns *prev; + /* + * These (start/end) are literal pfns of 2MB pages on which the page + * structures reside, not start and end+1. + */ + unsigned long vmap_pfn_start; + unsigned long vmap_pfn_end; + /* + * These (start/end) are literal pfns represented on these pages, not + * start and end+1. + * The starting page struct is at least partly on the first page; the + * ending page struct is entirely on the last page. + */ + unsigned long rep_pfn_start; + unsigned long rep_pfn_end; +}; + +/* for saving a list of pfns to a buffer, and then to a file if necessary */ +struct save_control { + int sc_fd; + char *sc_filename; + char *sc_buf; + long sc_buflen; /* length of buffer never changes */ + long sc_bufposition; /* offset of next slot for write, or next to be read */ + long sc_filelen; /* length of valid data written */ + long sc_fileposition; /* offset in file of next entry to be read */ +}; +/* one entry in the buffer and file */ +struct sc_entry { + unsigned long startpfn; + unsigned long numpfns; +}; + +extern struct symbol_table symbol_table; +extern struct size_table size_table; +extern struct offset_table offset_table; +extern struct array_table array_table; +extern struct number_table number_table; +extern struct srcfile_table srcfile_table; + +struct memory_range { + unsigned long long start, end; +}; + +#define CRASH_RESERVED_MEM_NR 8 +struct memory_range crash_reserved_mem[CRASH_RESERVED_MEM_NR]; +int crash_reserved_mem_nr; + +unsigned long read_vmcoreinfo_symbol(char *str_symbol); +int readmem(int type_addr, unsigned long long addr, void *bufptr, size_t size); +int get_str_osrelease_from_vmlinux(void); +int read_vmcoreinfo_xen(void); +int exclude_xen_user_domain(void); +mdf_pfn_t get_num_dumpable(void); +int __read_disk_dump_header(struct disk_dump_header *dh, char *filename); +int read_disk_dump_header(struct disk_dump_header *dh, char *filename); +int read_kdump_sub_header(struct kdump_sub_header *kh, char *filename); +void close_vmcoreinfo(void); +int close_files_for_creating_dumpfile(void); +int iomem_for_each_line(char *match, int (*callback)(void *data, int nr, + char *str, + unsigned long base, + unsigned long length), + void *data); +int is_bigendian(void); +int get_symbol_info(void); + +/* + * for Xen extraction + */ +struct domain_list { + unsigned long domain_addr; + unsigned int domain_id; + unsigned int pickled_id; +}; + +#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8) +#define MFNS_PER_FRAME (info->page_size / sizeof(unsigned long)) + +#ifdef __aarch64__ +unsigned long long kvtop_xen_arm64(unsigned long kvaddr); +#define kvtop_xen(X) kvtop_xen_arm64(X) +#endif /* aarch64 */ + +#ifdef __arm__ +#define kvtop_xen(X) FALSE +#define get_xen_basic_info_arch(X) FALSE +#define get_xen_info_arch(X) FALSE +#endif /* arm */ + +#ifdef __x86__ +#define HYPERVISOR_VIRT_START_PAE (0xF5800000UL) +#define HYPERVISOR_VIRT_START (0xFC000000UL) +#define HYPERVISOR_VIRT_END (0xFFFFFFFFUL) +#define DIRECTMAP_VIRT_START (0xFF000000UL) +#define DIRECTMAP_VIRT_END (0xFFC00000UL) +#define FRAMETABLE_VIRT_START (0xF6800000UL) + +#define is_xen_vaddr(x) \ + ((x) >= HYPERVISOR_VIRT_START_PAE && (x) < HYPERVISOR_VIRT_END) +#define is_direct(x) \ + ((x) >= DIRECTMAP_VIRT_START && (x) < DIRECTMAP_VIRT_END) + +unsigned long long kvtop_xen_x86(unsigned long kvaddr); +#define kvtop_xen(X) kvtop_xen_x86(X) + +int get_xen_basic_info_x86(void); +#define get_xen_basic_info_arch(X) get_xen_basic_info_x86(X) +int get_xen_info_x86(void); +#define get_xen_info_arch(X) get_xen_info_x86(X) + +#endif /* __x86__ */ + +#ifdef __x86_64__ + +/* The architectural limit for physical addresses is 52 bits. + * Mask off bits 52-62 (available for OS use) and bit 63 (NX). + */ +#define ENTRY_MASK (~0xfff0000000000fffULL) +#define MAX_X86_64_FRAMES (info->page_size / sizeof(unsigned long)) + +#define PAGE_OFFSET_XEN_DOM0 (0xffff880000000000) /* different from linux */ +#define HYPERVISOR_VIRT_START (0xffff800000000000) +#define HYPERVISOR_VIRT_END (0xffff880000000000) +#define DIRECTMAP_VIRT_START (0xffff830000000000) +#define DIRECTMAP_VIRT_END_V3 (0xffff840000000000) +#define DIRECTMAP_VIRT_END_V4 (0xffff880000000000) +#define DIRECTMAP_VIRT_END (info->directmap_virt_end) +#define XEN_VIRT_START (info->xen_virt_start) +#define XEN_VIRT_END (XEN_VIRT_START + (1UL << 30)) +#define FRAMETABLE_VIRT_START_V3 0xffff82f600000000 +#define FRAMETABLE_VIRT_START_V4_3 0xffff82e000000000 + +#define is_xen_vaddr(x) \ + ((x) >= HYPERVISOR_VIRT_START && (x) < HYPERVISOR_VIRT_END) +#define is_direct(x) \ + ((x) >= DIRECTMAP_VIRT_START && (x) < DIRECTMAP_VIRT_END) +#define is_xen_text(x) \ + ((x) >= XEN_VIRT_START && (x) < XEN_VIRT_END) + +unsigned long long kvtop_xen_x86_64(unsigned long kvaddr); +#define kvtop_xen(X) kvtop_xen_x86_64(X) + +int get_xen_basic_info_x86_64(void); +#define get_xen_basic_info_arch(X) get_xen_basic_info_x86_64(X) +int get_xen_info_x86_64(void); +#define get_xen_info_arch(X) get_xen_info_x86_64(X) + +#endif /* __x86_64__ */ + +#ifdef __ia64__ +#define HYPERVISOR_VIRT_START (0xe800000000000000) +#define HYPERVISOR_VIRT_END (0xf800000000000000) +#define DEFAULT_SHAREDINFO_ADDR (0xf100000000000000) +#define PERCPU_PAGE_SIZE 65536 +#define PERCPU_ADDR (DEFAULT_SHAREDINFO_ADDR - PERCPU_PAGE_SIZE) +#define DIRECTMAP_VIRT_START (0xf000000000000000) +#define DIRECTMAP_VIRT_END PERCPU_ADDR +#define VIRT_FRAME_TABLE_ADDR (0xf300000000000000) +#define VIRT_FRAME_TABLE_END (0xf400000000000000) + +#define is_xen_vaddr(x) \ + ((x) >= HYPERVISOR_VIRT_START && (x) < HYPERVISOR_VIRT_END) +#define is_direct(x) \ + ((x) >= DIRECTMAP_VIRT_START && (x) < DIRECTMAP_VIRT_END) +#define is_frame_table_vaddr(x) \ + ((x) >= VIRT_FRAME_TABLE_ADDR && (x) < VIRT_FRAME_TABLE_END) + +#define PGDIR_SHIFT (PAGESHIFT() + 2 * (PAGESHIFT() - 3)) +#define PTRS_PER_PGD (1UL << (PAGESHIFT() - 3)) +#define PTRS_PER_PMD (1UL << (PAGESHIFT() - 3)) +#define PTRS_PER_PTE (1UL << (PAGESHIFT() - 3)) + +#define IA64_MAX_PHYS_BITS 50 +#define _PAGE_P (1) +#define _PFN_MASK (((1UL << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL) + +unsigned long long kvtop_xen_ia64(unsigned long kvaddr); +#define kvtop_xen(X) kvtop_xen_ia64(X) + +int get_xen_basic_info_ia64(void); +#define get_xen_basic_info_arch(X) get_xen_basic_info_ia64(X) +int get_xen_info_ia64(void); +#define get_xen_info_arch(X) get_xen_info_ia64(X) + +#endif /* __ia64 */ + +#if defined(__powerpc64__) || defined(__powerpc32__) /* powerpcXX */ +#define kvtop_xen(X) FALSE +#define get_xen_basic_info_arch(X) FALSE +#define get_xen_info_arch(X) FALSE +#endif /* powerpcXX */ + +#ifdef __s390x__ /* s390x */ +#define kvtop_xen(X) FALSE +#define get_xen_basic_info_arch(X) FALSE +#define get_xen_info_arch(X) FALSE +#endif /* s390x */ + +#ifdef __sparc64__ /* sparc64 */ +#define kvtop_xen(X) FALSE +#define get_xen_basic_info_arch(X) FALSE +#define get_xen_info_arch(X) FALSE +#endif /* sparc64 */ + +struct cycle { + mdf_pfn_t start_pfn; + mdf_pfn_t end_pfn; + + /* for excluding multi-page regions */ + mdf_pfn_t exclude_pfn_start; + mdf_pfn_t exclude_pfn_end; + mdf_pfn_t *exclude_pfn_counter; +}; + +static inline int +is_on(char *bitmap, mdf_pfn_t i) +{ + return bitmap[i>>3] & (1 << (i & 7)); +} + +static inline int +is_cyclic_region(mdf_pfn_t pfn, struct cycle *cycle) +{ + if (pfn < cycle->start_pfn || cycle->end_pfn <= pfn) + return FALSE; + else + return TRUE; +} + +static inline int +is_dumpable_buffer(struct dump_bitmap *bitmap, mdf_pfn_t pfn, struct cycle *cycle) +{ + if (!is_cyclic_region(pfn, cycle)) + return FALSE; + else + return is_on(bitmap->buf, pfn - cycle->start_pfn); +} + +static inline int +is_dumpable_file(struct dump_bitmap *bitmap, mdf_pfn_t pfn) +{ + off_t offset; + ssize_t rcode; + if (pfn == 0 || bitmap->no_block != pfn/PFN_BUFBITMAP) { + offset = bitmap->offset + BUFSIZE_BITMAP*(pfn/PFN_BUFBITMAP); + if (lseek(bitmap->fd, offset, SEEK_SET) < 0 ) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + bitmap->file_name, strerror(errno)); + return FALSE; + } + + rcode = read(bitmap->fd, bitmap->buf, BUFSIZE_BITMAP); + if (rcode != BUFSIZE_BITMAP) + ERRMSG("Can't read the bitmap(%s). %s\n", + bitmap->file_name, strerror(errno)); + if (pfn == 0) + bitmap->no_block = 0; + else + bitmap->no_block = pfn/PFN_BUFBITMAP; + } + return is_on(bitmap->buf, pfn%PFN_BUFBITMAP); +} + +static inline int +is_dumpable(struct dump_bitmap *bitmap, mdf_pfn_t pfn, struct cycle *cycle) +{ + if (bitmap->fd < 0) { + return is_dumpable_buffer(bitmap, pfn, cycle); + } else { + return is_dumpable_file(bitmap, pfn); + } +} + +static inline int +is_zero_page(unsigned char *buf, long page_size) +{ + size_t i; + unsigned long long *vect = (unsigned long long *) buf; + long page_len = page_size / sizeof(unsigned long long); + + for (i = 0; i < page_len; i++) + if (vect[i]) + return FALSE; + return TRUE; +} + +void write_vmcoreinfo_data(void); +int set_bit_on_1st_bitmap(mdf_pfn_t pfn, struct cycle *cycle); +int clear_bit_on_1st_bitmap(mdf_pfn_t pfn, struct cycle *cycle); + +#ifdef __x86__ + +struct user_regs_struct { + unsigned long bx; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long bp; + unsigned long ax; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; + unsigned long orig_ax; + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; +}; + +struct elf_prstatus { + char pad1[72]; + struct user_regs_struct pr_reg; + char pad2[4]; +}; + +#endif + +#ifdef __x86_64__ + +struct user_regs_struct { + unsigned long r15; + unsigned long r14; + unsigned long r13; + unsigned long r12; + unsigned long bp; + unsigned long bx; + unsigned long r11; + unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long ax; + unsigned long cx; + unsigned long dx; + unsigned long si; + unsigned long di; + unsigned long orig_ax; + unsigned long ip; + unsigned long cs; + unsigned long flags; + unsigned long sp; + unsigned long ss; + unsigned long fs_base; + unsigned long gs_base; + unsigned long ds; + unsigned long es; + unsigned long fs; + unsigned long gs; +}; + +struct elf_prstatus { + char pad1[112]; + struct user_regs_struct pr_reg; + char pad2[4]; +}; + +#endif + +/* + * Below are options which getopt_long can recognize. From OPT_START options are + * non-printable, just used for implementation. + */ +#define OPT_BLOCK_ORDER 'b' +#define OPT_COMPRESS_ZLIB 'c' +#define OPT_DEBUG 'D' +#define OPT_DUMP_LEVEL 'd' +#define OPT_ELF_DUMPFILE 'E' +#define OPT_EXCLUDE_UNUSED_VM 'e' +#define OPT_FLATTEN 'F' +#define OPT_FORCE 'f' +#define OPT_GENERATE_VMCOREINFO 'g' +#define OPT_HELP 'h' +#define OPT_READ_VMCOREINFO 'i' +#define OPT_COMPRESS_LZO 'l' +#define OPT_COMPRESS_SNAPPY 'p' +#define OPT_REARRANGE 'R' +#define OPT_VERSION 'v' +#define OPT_EXCLUDE_XEN_DOM 'X' +#define OPT_VMLINUX 'x' +#define OPT_START 256 +#define OPT_SPLIT OPT_START+0 +#define OPT_REASSEMBLE OPT_START+1 +#define OPT_XEN_SYMS OPT_START+2 +#define OPT_XEN_VMCOREINFO OPT_START+3 +#define OPT_XEN_PHYS_START OPT_START+4 +#define OPT_MESSAGE_LEVEL OPT_START+5 +#define OPT_VTOP OPT_START+6 +#define OPT_DUMP_DMESG OPT_START+7 +#define OPT_CONFIG OPT_START+8 +#define OPT_DISKSET OPT_START+9 +#define OPT_CYCLIC_BUFFER OPT_START+10 +#define OPT_EPPIC OPT_START+11 +#define OPT_NON_MMAP OPT_START+12 +#define OPT_MEM_USAGE OPT_START+13 +#define OPT_SPLITBLOCK_SIZE OPT_START+14 +#define OPT_WORKING_DIR OPT_START+15 +#define OPT_NUM_THREADS OPT_START+16 +#define OPT_PARTIAL_DMESG OPT_START+17 + +/* + * Function Prototype. + */ +mdf_pfn_t get_num_dumpable_cyclic(void); +mdf_pfn_t get_num_dumpable_cyclic_withsplit(void); +int get_loads_dumpfile_cyclic(void); +int initial_xen(void); +unsigned long long ptom_xen(unsigned long long paddr); +unsigned long long get_free_memory_size(void); +int calculate_cyclic_buffer_size(void); +int prepare_splitblock_table(void); +int initialize_zlib(z_stream *stream, int level); +int finalize_zlib(z_stream *stream); + +int parse_line(char *str, char *argv[]); +char *shift_string_left(char *s, int cnt); +char *clean_line(char *line); +char *strip_linefeeds(char *line); +char *strip_beginning_whitespace(char *line); +char *strip_ending_whitespace(char *line); +ulong htol(char *s, int flags); +int hexadecimal(char *s, int count); +int decimal(char *s, int count); +int file_exists(char *file); + +#endif /* MAKEDUMPFILE_H */ diff --git a/makedumpfile.spec b/makedumpfile.spec new file mode 100644 index 0000000..88bb555 --- /dev/null +++ b/makedumpfile.spec @@ -0,0 +1,50 @@ +Name: makedumpfile +Summary: makedumpfile package +Version: 1.6.4 +Release: 1 +Group: Applications/Text +License: GPL +Source: %{name}-%{version}.tar.gz +BuildRoot: %{_tmppath}/%{name}-root +Distribution: Linux 2.6 or greater +Packager: Atsushi Kumagai <ats-kumagai@wm.jp.nec.com> +ExclusiveOS: Linux +ExclusiveArch: i386 ia64 ppc ppc64 ppc64pseries ppc64iseries x86_64 +Buildroot: %{_tmppath}/%{name}-root +BuildRequires: zlib-devel elfutils-devel-static + +%description +makedumpfile package. + +%prep +%setup + +%build +make + +%install +rm -rf %{buildroot} +mkdir -p %{buildroot}/usr/sbin +mkdir -p %{buildroot}/etc +mkdir -p %{buildroot}/usr/share/man/man5 +mkdir -p %{buildroot}/usr/share/man/man8 +mkdir -p %{buildroot}/usr/share/%{name}-%{version}/eppic-scripts/ +make install DESTDIR=%{buildroot} + +%clean +rm -rf %{buildroot} + +%files +/etc/makedumpfile.conf.sample +/usr/sbin/makedumpfile +/usr/sbin/makedumpfile-R.pl +/usr/share/man/man5/makedumpfile.conf.5.gz +/usr/share/man/man8/makedumpfile.8.gz +/usr/share/%{name}-%{version}/eppic_scripts/ + +%changelog +* Wed Dec 18 2013 Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp> +- reflect the changing of installation directory. + +* Fri Aug 21 2008 Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp> +- initial release. diff --git a/print_info.c b/print_info.c new file mode 100644 index 0000000..6bfcd11 --- /dev/null +++ b/print_info.c @@ -0,0 +1,422 @@ +/* + * print_info.c + * + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include "print_info.h" +#include <time.h> +#include <string.h> + +#define PROGRESS_MAXLEN "50" + +int message_level; +int flag_strerr_message; +int flag_ignore_r_char; /* 0: '\r' is effective. 1: not effective. */ + +void +show_version(void) +{ + MSG("makedumpfile: version " VERSION " (released on " RELEASE_DATE ")\n"); +#ifdef USELZO + MSG("lzo\tenabled\n"); +#else + MSG("lzo\tdisabled\n"); +#endif +#ifdef USESNAPPY + MSG("snappy\tenabled\n"); +#else + MSG("snappy\tdisabled\n"); +#endif + MSG("\n"); +} + +void +print_usage(void) +{ + MSG("\n"); + MSG("LZO support:\n"); +#ifdef USELZO + MSG(" enabled\n"); +#else + MSG(" disabled ('-l' option will be ignored.)\n"); +#endif + MSG("snappy support:\n"); +#ifdef USESNAPPY + MSG(" enabled\n"); +#else + MSG(" disabled ('-p' option will be ignored.)\n"); +#endif + MSG("\n"); + MSG("Usage:\n"); + MSG(" Creating DUMPFILE:\n"); + MSG(" # makedumpfile [-c|-l|-p|-E] [-d DL] [-e] [-x VMLINUX|-i VMCOREINFO] VMCORE\n"); + MSG(" DUMPFILE\n"); + MSG("\n"); + MSG(" Creating DUMPFILE with filtered kernel data specified through filter config\n"); + MSG(" file or eppic macro:\n"); + MSG(" # makedumpfile [-c|-l|-p|-E] [-d DL] -x VMLINUX [--config FILTERCONFIGFILE]\n"); + MSG(" [--eppic EPPICMACRO] VMCORE DUMPFILE\n"); + MSG("\n"); + MSG(" Outputting the dump data in the flattened format to the standard output:\n"); + MSG(" # makedumpfile -F [-c|-l|-p|-E] [-d DL] [-x VMLINUX|-i VMCOREINFO] VMCORE\n"); + MSG("\n"); + MSG(" Rearranging the dump data in the flattened format to a readable DUMPFILE:\n"); + MSG(" # makedumpfile -R DUMPFILE\n"); + MSG("\n"); + MSG(" Split the dump data to multiple DUMPFILEs in parallel:\n"); + MSG(" # makedumpfile --split [OPTION] [-x VMLINUX|-i VMCOREINFO] VMCORE DUMPFILE1\n"); + MSG(" DUMPFILE2 [DUMPFILE3 ..]\n"); + MSG("\n"); + MSG(" Using multiple threads to create DUMPFILE in parallel:\n"); + MSG(" # makedumpfile [OPTION] [-x VMLINUX|-i VMCOREINFO] --num-threads THREADNUM\n"); + MSG(" VMCORE DUMPFILE1\n"); + MSG("\n"); + MSG(" Reassemble multiple DUMPFILEs:\n"); + MSG(" # makedumpfile --reassemble DUMPFILE1 DUMPFILE2 [DUMPFILE3 ..] DUMPFILE\n"); + MSG("\n"); + MSG(" Generating VMCOREINFO:\n"); + MSG(" # makedumpfile -g VMCOREINFO -x VMLINUX\n"); + MSG("\n"); + MSG(" Extracting the dmesg log from a VMCORE:\n"); + MSG(" # makedumpfile --dump-dmesg [-x VMLINUX|-i VMCOREINFO] VMCORE LOGFILE\n"); + MSG("\n"); + MSG("\n"); + MSG(" Creating DUMPFILE of Xen:\n"); + MSG(" # makedumpfile [-c|-l|-p|-E] [--xen-syms XEN-SYMS|--xen-vmcoreinfo VMCOREINFO]\n"); + MSG(" VMCORE DUMPFILE\n"); + MSG("\n"); + MSG(" Filtering domain-0 of Xen:\n"); + MSG(" # makedumpfile [-c|-l|-p|-E] -d DL -x vmlinux VMCORE DUMPFILE\n"); + MSG("\n"); + MSG(" Generating VMCOREINFO of Xen:\n"); + MSG(" # makedumpfile -g VMCOREINFO --xen-syms XEN-SYMS\n"); + MSG("\n"); + MSG("\n"); + MSG(" Creating DUMPFILE from multiple VMCOREs generated on sadump diskset configuration:\n"); + MSG(" # makedumpfile [-c|-l|-p] [-d DL] -x VMLINUX --diskset=VMCORE1 --diskset=VMCORE2\n"); + MSG(" [--diskset=VMCORE3 ..] DUMPFILE\n"); + MSG("\n"); + MSG("\n"); + MSG("Available options:\n"); + MSG(" [-c|-l|-p]:\n"); + MSG(" Compress dump data by each page using zlib for -c option, lzo for -l option\n"); + MSG(" or snappy for -p option. A user cannot specify either of these options with\n"); + MSG(" -E option, because the ELF format does not support compressed data.\n"); + MSG(" THIS IS ONLY FOR THE CRASH UTILITY.\n"); + MSG("\n"); + MSG(" [-e]:\n"); + MSG(" Exclude the page structures (vmemmap) which represent excluded pages.\n"); + MSG(" This greatly shortens the dump of a very large memory system.\n"); + MSG(" The --work-dir option must also be specified, as it will be used\n"); + MSG(" to hold bitmaps and a file of page numbers that are to be excluded.\n"); + MSG(" The -e option will cause a noncyclic dump procedure.\n"); + MSG(" This option is only for x86_64.\n"); + MSG("\n"); + MSG(" [-d DL]:\n"); + MSG(" Specify the type of unnecessary page for analysis.\n"); + MSG(" Pages of the specified type are not copied to DUMPFILE. The page type\n"); + MSG(" marked in the following table is excluded. A user can specify multiple\n"); + MSG(" page types by setting the sum of each page type for Dump_Level (DL).\n"); + MSG(" The maximum of Dump_Level is 31.\n"); + MSG(" Note that Dump_Level for Xen dump filtering is 0 or 1 except on x86_64\n"); + MSG("\n"); + MSG(" | non-\n"); + MSG(" Dump | zero private private user free\n"); + MSG(" Level | page cache cache data page\n"); + MSG(" -------+---------------------------------------\n"); + MSG(" 0 |\n"); + MSG(" 1 | X\n"); + MSG(" 2 | X\n"); + MSG(" 4 | X X\n"); + MSG(" 8 | X\n"); + MSG(" 16 | X\n"); + MSG(" 31 | X X X X X\n"); + MSG("\n"); + MSG(" [-E]:\n"); + MSG(" Create DUMPFILE in the ELF format.\n"); + MSG(" This option cannot be specified with the -c, -l or -p options,\n"); + MSG(" because the ELF format does not support compressed data.\n"); + MSG("\n"); + MSG(" [-x VMLINUX]:\n"); + MSG(" Specify the first kernel's VMLINUX to analyze the first kernel's\n"); + MSG(" memory usage.\n"); + MSG(" The page size of the first kernel and the second kernel should match.\n"); + MSG("\n"); + MSG(" [-i VMCOREINFO]:\n"); + MSG(" Specify VMCOREINFO instead of VMLINUX for analyzing the first kernel's\n"); + MSG(" memory usage.\n"); + MSG(" VMCOREINFO should be made beforehand by makedumpfile with -g option,\n"); + MSG(" and it contains the first kernel's information. This option is necessary\n"); + MSG(" if VMCORE does not contain VMCOREINFO, [-x VMLINUX] is not specified,\n"); + MSG(" and dump_level is 2 or more.\n"); + MSG("\n"); + MSG(" [-g VMCOREINFO]:\n"); + MSG(" Generate VMCOREINFO from the first kernel's VMLINUX.\n"); + MSG(" VMCOREINFO must be generated on the system that is running the first\n"); + MSG(" kernel. With -i option, a user can specify VMCOREINFO generated on the\n"); + MSG(" other system that is running the same first kernel. [-x VMLINUX] must\n"); + MSG(" be specified.\n"); + MSG("\n"); + MSG(" [--config FILTERCONFIGFILE]:\n"); + MSG(" Used in conjunction with -x VMLINUX option, to specify the filter config\n"); + MSG(" file that contains filter commands to filter out desired kernel data\n"); + MSG(" from vmcore while creating DUMPFILE.\n"); + MSG("\n"); + MSG(" [--eppic EPPICMACRO]:\n"); + MSG(" Used in conjunction with -x VMLINUX option, to specify the eppic macro\n"); + MSG(" file that contains filter rules or directory that contains eppic macro\n"); + MSG(" files to filter out desired kernel data from vmcore while creating DUMPFILE.\n"); + MSG(" When directory is specified, all the eppic macros in the directory are\n"); + MSG(" processed\n"); + MSG("\n"); + MSG(" [-F]:\n"); + MSG(" Output the dump data in the flattened format to the standard output\n"); + MSG(" for transporting the dump data by SSH.\n"); + MSG(" Analysis tools cannot read the flattened format directly. For analysis,\n"); + MSG(" the dump data in the flattened format should be rearranged to a readable\n"); + MSG(" DUMPFILE by -R option.\n"); + MSG("\n"); + MSG(" [-R]:\n"); + MSG(" Rearrange the dump data in the flattened format from the standard input\n"); + MSG(" to a readable DUMPFILE.\n"); + MSG("\n"); + MSG(" [--split]:\n"); + MSG(" Split the dump data to multiple DUMPFILEs in parallel. If specifying\n"); + MSG(" DUMPFILEs on different storage devices, a device can share I/O load with\n"); + MSG(" other devices and it reduces time for saving the dump data. The file size\n"); + MSG(" of each DUMPFILE is smaller than the system memory size which is divided\n"); + MSG(" by the number of DUMPFILEs.\n"); + MSG(" This feature supports only the kdump-compressed format.\n"); + MSG("\n"); + MSG(" [--num-threads THREADNUM]:\n"); + MSG(" Using multiple threads to read and compress data of each page in parallel.\n"); + MSG(" And it will reduces time for saving DUMPFILE.\n"); + MSG(" Note that if the usable cpu number is less than the thread number, it may\n"); + MSG(" lead to great performance degradation.\n"); + MSG(" This feature only supports creating DUMPFILE in kdump-compressed format from\n"); + MSG(" VMCORE in kdump-compressed format or elf format.\n"); + MSG("\n"); + MSG(" [--reassemble]:\n"); + MSG(" Reassemble multiple DUMPFILEs, which are created by --split option,\n"); + MSG(" into one DUMPFILE. dumpfile1 and dumpfile2 are reassembled into dumpfile.\n"); + MSG("\n"); + MSG(" [-b <order>]\n"); + MSG(" Specify the cache 2^order pages in ram when generating DUMPFILE before\n"); + MSG(" writing to output. The default value is 4.\n"); + MSG("\n"); + MSG(" [--cyclic-buffer BUFFER_SIZE]:\n"); + MSG(" Specify the buffer size in kilo bytes for bitmap data.\n"); + MSG(" Filtering processing will be divided into multi cycles to fix the memory\n"); + MSG(" consumption, the number of cycles is represented as:\n"); + MSG("\n"); + MSG(" num_of_cycles = system_memory / \n"); + MSG(" (BUFFER_SIZE * 1024 * bit_per_bytes * page_size)\n"); + MSG("\n"); + MSG(" The lesser number of cycles, the faster working speed is expected.\n"); + MSG(" By default, BUFFER_SIZE will be calculated automatically depending on\n"); + MSG(" system memory size, so ordinary users don't need to specify this option.\n"); + MSG("\n"); + MSG(" [--splitblock-size SPLITBLOCK_SIZE]:\n"); + MSG(" Specify the splitblock size in kilo bytes for analysis with --split.\n"); + MSG(" If --splitblock N is specified, difference of each splitted dumpfile\n"); + MSG(" size is at most N kilo bytes.\n"); + MSG("\n"); + MSG(" [--work-dir]:\n"); + MSG(" Specify the working directory for the temporary bitmap file.\n"); + MSG(" If this option isn't specified, the bitmap will be saved on memory.\n"); + MSG(" Filtering processing has to do 2 pass scanning to fix the memory consumption,\n"); + MSG(" but it can be avoided by using working directory on file system.\n"); + MSG(" So if you specify this option, the filtering speed may be bit faster.\n"); + MSG("\n"); + MSG(" [--non-mmap]:\n"); + MSG(" Never use mmap(2) to read VMCORE even if it supports mmap(2).\n"); + MSG(" Generally, reading VMCORE with mmap(2) is faster than without it,\n"); + MSG(" so ordinary users don't need to specify this option.\n"); + MSG(" This option is mainly for debugging.\n"); + MSG("\n"); + MSG(" [--xen-syms XEN-SYMS]:\n"); + MSG(" Specify the XEN-SYMS to analyze Xen's memory usage.\n"); + MSG("\n"); + MSG(" [--xen-vmcoreinfo VMCOREINFO]:\n"); + MSG(" Specify the VMCOREINFO of Xen to analyze Xen's memory usage.\n"); + MSG("\n"); + MSG(" [--xen_phys_start XEN_PHYS_START_ADDRESS]:\n"); + MSG(" This option is only for x86_64.\n"); + MSG(" Specify the XEN_PHYS_START_ADDRESS, if the xen code/data is relocatable\n"); + MSG(" and VMCORE does not contain XEN_PHYS_START_ADDRESS in the CRASHINFO.\n"); + MSG("\n"); + MSG(" [-X]:\n"); + MSG(" Exclude all the user domain pages from Xen kdump's VMCORE, and extract\n"); + MSG(" the part of Xen and domain-0.\n"); + MSG("\n"); + MSG(" [--diskset=VMCORE]:\n"); + MSG(" Specify multiple VMCOREs created on sadump diskset configuration the same\n"); + MSG(" number of times as the number of VMCOREs in increasing order from left to\n"); + MSG(" right.\n"); + MSG("\n"); + MSG(" [--message-level ML]:\n"); + MSG(" Specify the message types.\n"); + MSG(" Users can restrict output printed by specifying Message_Level (ML) with\n"); + MSG(" this option. The message type marked with an X in the following table is\n"); + MSG(" printed. For example, according to the table, specifying 7 as ML means\n"); + MSG(" progress indicator, common message, and error message are printed, and\n"); + MSG(" this is a default value.\n"); + MSG(" Note that the maximum value of message_level is 31.\n"); + MSG("\n"); + MSG(" Message | progress common error debug report\n"); + MSG(" Level | indicator message message message message\n"); + MSG(" ---------+------------------------------------------------------\n"); + MSG(" 0 |\n"); + MSG(" 1 | X\n"); + MSG(" 2 | X\n"); + MSG(" 4 | X\n"); + MSG(" * 7 | X X X\n"); + MSG(" 8 | X\n"); + MSG(" 16 | X\n"); + MSG(" 31 | X X X X X\n"); + MSG("\n"); + MSG(" [--vtop VIRTUAL_ADDRESS]:\n"); + MSG(" This option is useful, when user debugs the translation problem\n"); + MSG(" of virtual address. If specifying the VIRTUAL_ADDRESS, its physical\n"); + MSG(" address is printed.\n"); + MSG("\n"); + MSG(" [--dump-dmesg]:\n"); + MSG(" This option overrides the normal behavior of makedumpfile. Instead of\n"); + MSG(" compressing and filtering a VMCORE to make it smaller, it simply\n"); + MSG(" extracts the dmesg log from a VMCORE and writes it to the specified\n"); + MSG(" LOGFILE. If a VMCORE does not contain VMCOREINFO for dmesg, it is\n"); + MSG(" necessary to specify [-x VMLINUX] or [-i VMCOREINFO].\n"); + MSG("\n"); + MSG(" [--mem-usage]:\n"); + MSG(" This option is only for x86_64.\n"); + MSG(" This option is used to show the page numbers of current system in different\n"); + MSG(" use. It should be executed in 1st kernel. By the help of this, user can know\n"); + MSG(" how many pages is dumpable when different dump_level is specified. It analyzes\n"); + MSG(" the 'System Ram' and 'kernel text' program segment of /proc/kcore excluding\n"); + MSG(" the crashkernel range, then calculates the page number of different kind per\n"); + MSG(" vmcoreinfo. So currently /proc/kcore need be specified explicitly.\n"); + MSG("\n"); + MSG(" [-D]:\n"); + MSG(" Print debugging message.\n"); + MSG("\n"); + MSG(" [-f]:\n"); + MSG(" Overwrite DUMPFILE even if it already exists.\n"); + MSG(" Force mem-usage to work with older kernel as well.\n"); + MSG("\n"); + MSG(" [-h, --help]:\n"); + MSG(" Show help message and LZO/snappy support status (enabled/disabled).\n"); + MSG("\n"); + MSG(" [-v]:\n"); + MSG(" Show the version of makedumpfile.\n"); + MSG("\n"); + MSG(" VMLINUX:\n"); + MSG(" This is a pathname to the first kernel's vmlinux.\n"); + MSG(" This file must have the debug information of the first kernel to analyze\n"); + MSG(" the first kernel's memory usage.\n"); + MSG("\n"); + MSG(" VMCORE:\n"); + MSG(" This is a pathname to the first kernel's memory core image.\n"); + MSG(" This argument is generally /proc/vmcore.\n"); + MSG("\n"); + MSG(" DUMPFILE:\n"); + MSG(" This is a pathname to a file created by this command.\n"); + MSG("\n"); + MSG(" XEN-SYMS:\n"); + MSG(" This is a pathname to the xen-syms.\n"); + MSG(" This file must have the debug information of Xen to analyze\n"); + MSG(" Xen's memory usage.\n"); + MSG("\n"); +} + +static void calc_delta(struct timeval *tv_start, struct timeval *delta) +{ + struct timeval tv_end; + + gettimeofday(&tv_end, NULL); + delta->tv_sec = tv_end.tv_sec - tv_start->tv_sec; + delta->tv_usec = tv_end.tv_usec - tv_start->tv_usec; + if (delta->tv_usec < 0) { + delta->tv_sec--; + delta->tv_usec += 1000000; + } +} + +/* produce less than 12 bytes on msg */ +static int eta_to_human_short (unsigned long secs, char* msg) +{ + strcpy(msg, "eta: "); + msg += strlen("eta: "); + if (secs < 100) + sprintf(msg, "%lus", secs); + else if (secs < 100 * 60) + sprintf(msg, "%lum%lus", secs / 60, secs % 60); + else if (secs < 48 * 3600) + sprintf(msg, "%luh%lum", secs / 3600, (secs / 60) % 60); + else if (secs < 100 * 86400) + sprintf(msg, "%lud%luh", secs / 86400, (secs / 3600) % 24); + else + sprintf(msg, ">2day"); + return 0; +} + + +void +print_progress(const char *msg, unsigned long current, unsigned long end, struct timeval *start) +{ + unsigned progress; /* in promilles (tenths of a percent) */ + time_t tm; + static time_t last_time = 0; + static unsigned int lapse = 0; + static const char *spinner = "/|\\-"; + struct timeval delta; + unsigned long eta; + char eta_msg[16] = " "; + + if (current < end) { + tm = time(NULL); + if (tm - last_time < 1) + return; + last_time = tm; + progress = current * 1000 / end; + } else + progress = 1000; + + if (start != NULL && progress != 0) { + calc_delta(start, &delta); + eta = 1000 * delta.tv_sec + delta.tv_usec / 1000; + eta = eta / progress - delta.tv_sec; + eta_to_human_short(eta, eta_msg); + } + if (flag_ignore_r_char) { + PROGRESS_MSG("%-" PROGRESS_MAXLEN "s: [%3u.%u %%] %c %16s\n", + msg, progress / 10, progress % 10, + spinner[lapse % 4], eta_msg); + } else { + PROGRESS_MSG("\r"); + PROGRESS_MSG("%-" PROGRESS_MAXLEN "s: [%3u.%u %%] %c %16s", + msg, progress / 10, progress % 10, + spinner[lapse % 4], eta_msg); + } + lapse++; +} + +void +print_execution_time(char *step_name, struct timeval *tv_start) +{ + struct timeval delta; + + calc_delta(tv_start, &delta); + REPORT_MSG("STEP [%s] : %ld.%06ld seconds\n", + step_name, delta.tv_sec, delta.tv_usec); +} + diff --git a/print_info.h b/print_info.h new file mode 100644 index 0000000..1ce3593 --- /dev/null +++ b/print_info.h @@ -0,0 +1,102 @@ +/* + * print_info.h + * + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _PRINT_INFO_H +#define _PRINT_INFO_H + +#include <stdio.h> +#include <sys/time.h> + +extern int message_level; +extern int flag_strerr_message; +extern int flag_ignore_r_char; + +void show_version(void); +void print_usage(void); +void print_progress(const char *msg, unsigned long current, unsigned long end, struct timeval *start); + +void print_execution_time(char *step_name, struct timeval *tv_start); + +/* + * Message texts + */ +#define PROGRESS_COPY "Copying data " +#define PROGRESS_HOLES "Checking for memory holes " +#define PROGRESS_UNN_PAGES "Excluding unnecessary pages" +#define PROGRESS_FREE_PAGES "Excluding free pages " +#define PROGRESS_ZERO_PAGES "Excluding zero pages " +#define PROGRESS_XEN_DOMAIN "Excluding xen user domain " + +/* + * Message Level + */ +#define MIN_MSG_LEVEL (0) +#define MAX_MSG_LEVEL (31) +#define DEFAULT_MSG_LEVEL (7) /* Print the progress indicator, the + common message, the error message */ +#define ML_PRINT_PROGRESS (0x001) /* Print the progress indicator */ +#define ML_PRINT_COMMON_MSG (0x002) /* Print the common message */ +#define ML_PRINT_ERROR_MSG (0x004) /* Print the error message */ +#define ML_PRINT_DEBUG_MSG (0x008) /* Print the debugging message */ +#define ML_PRINT_REPORT_MSG (0x010) /* Print the report message */ + +#define MSG(x...) \ +do { \ + if (message_level & ML_PRINT_COMMON_MSG) { \ + if (flag_strerr_message) \ + fprintf(stderr, x); \ + else \ + fprintf(stdout, x); \ + } \ +} while (0) + +#define ERRMSG(x...) \ +do { \ + if (message_level & ML_PRINT_ERROR_MSG) { \ + fprintf(stderr, __FUNCTION__); \ + fprintf(stderr, ": "); \ + fprintf(stderr, x); \ + } \ +} while (0) + +#define PROGRESS_MSG(x...) \ +do { \ + if (message_level & ML_PRINT_PROGRESS) { \ + fprintf(stderr, x); \ + } \ +} while (0) + +#define DEBUG_MSG(x...) \ +do { \ + if (message_level & ML_PRINT_DEBUG_MSG) { \ + if (flag_strerr_message) \ + fprintf(stderr, x); \ + else \ + fprintf(stdout, x); \ + } \ +} while (0) + +#define REPORT_MSG(x...) \ +do { \ + if (message_level & ML_PRINT_REPORT_MSG) { \ + if (flag_strerr_message) \ + fprintf(stderr, x); \ + else \ + fprintf(stdout, x); \ + } \ +} while (0) + +#endif /* PRINT_INFO_H */ + diff --git a/sadump_info.c b/sadump_info.c new file mode 100644 index 0000000..dd50d48 --- /dev/null +++ b/sadump_info.c @@ -0,0 +1,2498 @@ +/* + * sadump_info.c + * + * Created by: HATAYAMA, Daisuke <d.hatayama@jp.fujitsu.com> + * + * Copyright (C) 2011 FUJITSU LIMITED + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#if defined(__x86__) || defined(__x86_64__) + +#include "makedumpfile.h" +#include "elf_info.h" +#include "print_info.h" +#include "sadump_mod.h" + +#include <arpa/inet.h> /* htonl, htons */ + +#define SADUMP_EFI_GUID_TEXT_REPR_LEN 36 + +#ifdef __x86__ + +#define KEXEC_NOTE_HEAD_BYTES roundup(sizeof(Elf32_Nhdr), 4) + +#endif + +#ifdef __x86_64__ + +#define MEGABYTES(x) ((x) * (1048576)) + +#define KEXEC_NOTE_HEAD_BYTES roundup(sizeof(Elf64_Nhdr), 4) + +#endif + +#define KEXEC_CORE_NOTE_DESC_BYTES roundup(sizeof(struct elf_prstatus), 4) + +#define KEXEC_NOTE_BYTES ((KEXEC_NOTE_HEAD_BYTES * 2) + \ + roundup(KEXEC_CORE_NOTE_NAME_BYTES, 4) + \ + KEXEC_CORE_NOTE_DESC_BYTES ) + +#define for_each_online_cpu(cpu) \ + for (cpu = 0; cpu < max_mask_cpu(); ++cpu) \ + if (is_online_cpu(cpu)) + +enum { + BITPERWORD = BITPERBYTE * sizeof(unsigned long) +}; + +struct sadump_diskset_info { + char *name_memory; + int fd_memory; + struct sadump_part_header *sph_memory; + unsigned long data_offset; +}; + +struct sadump_info { + struct sadump_part_header *sph_memory; + struct sadump_header *sh_memory; + struct sadump_disk_set_header *sdh_memory; + struct sadump_media_header *smh_memory; + struct sadump_diskset_info *diskset_info; + int num_disks; + unsigned long sub_hdr_offset; + uint32_t smram_cpu_state_size; + unsigned long data_offset; + unsigned long long *block_table; + unsigned long *__per_cpu_offset; + unsigned long __per_cpu_load; + FILE *file_elf_note; + char *cpu_online_mask_buf; + size_t cpumask_size; +/* Backup Region, First 640K of System RAM. */ +#define KEXEC_BACKUP_SRC_END 0x0009ffff + unsigned long long backup_src_start; + unsigned long backup_src_size; + unsigned long long backup_offset; + int kdump_backed_up; + mdf_pfn_t max_mapnr; + struct dump_bitmap *ram_bitmap; +}; + +static char *guid_to_str(efi_guid_t *guid, char *buf, size_t buflen); +static struct tm *efi_time_t_to_tm(const efi_time_t *e); +static int verify_magic_number(uint32_t magicnum[DUMP_PART_HEADER_MAGICNUM_SIZE]); +static int read_device(void *buf, size_t bytes, ulong *offset); +static int read_device_diskset(struct sadump_diskset_info *sdi, void *buf, + size_t bytes, ulong *offset); +static int read_sadump_header(char *filename); +static int read_sadump_header_diskset(int diskid, struct sadump_diskset_info *sdi); +static unsigned long long pfn_to_block(mdf_pfn_t pfn); +static int lookup_diskset(unsigned long long whole_offset, int *diskid, + unsigned long long *disk_offset); +static int max_mask_cpu(void); +static int cpu_online_mask_init(void); +static int per_cpu_init(void); +static int get_data_from_elf_note_desc(const char *note_buf, uint32_t n_descsz, + char *name, uint32_t n_type, char **data); +static int alignfile(unsigned long *offset); +static int +write_elf_note_header(char *name, void *data, size_t descsz, uint32_t type, + unsigned long *offset, unsigned long *desc_offset); +static int is_online_cpu(int cpu); +static unsigned long legacy_per_cpu_ptr(unsigned long ptr, int cpu); +static unsigned long per_cpu_ptr(unsigned long ptr, int cpu); +static int get_prstatus_from_crash_notes(int cpu, char *prstatus_buf); +static int cpu_to_apicid(int cpu, int *apicid); +static int get_smram_cpu_state(int apicid, struct sadump_smram_cpu_state *smram); +static int copy_regs_from_prstatus(struct elf_prstatus *prstatus, + const char *prstatus_buf); +static int +copy_regs_from_smram_cpu_state(struct elf_prstatus *prstatus, + const struct sadump_smram_cpu_state *smram); +static void +debug_message_smram_cpu_state(int apicid, struct sadump_smram_cpu_state *s); +static void +debug_message_user_regs_struct(int cpu, struct elf_prstatus *prstatus); +static int get_registers(int cpu, struct elf_prstatus *prstatus); + +static struct sadump_info sadump_info = {}; +static struct sadump_info *si = &sadump_info; + +static inline int +sadump_is_on(char *bitmap, mdf_pfn_t i) +{ + return bitmap[i >> 3] & (1 << (7 - (i & 7))); +} + +static inline int +sadump_is_dumpable(struct dump_bitmap *bitmap, mdf_pfn_t pfn) +{ + off_t offset; + ssize_t rcode; + + if (pfn == 0 || bitmap->no_block != pfn/PFN_BUFBITMAP) { + offset = bitmap->offset + BUFSIZE_BITMAP*(pfn/PFN_BUFBITMAP); + lseek(bitmap->fd, offset, SEEK_SET); + rcode = read(bitmap->fd, bitmap->buf, BUFSIZE_BITMAP); + if (rcode != BUFSIZE_BITMAP) + ERRMSG("Can't read the bitmap(%s). %s\n", + bitmap->file_name, strerror(errno)); + if (pfn == 0) + bitmap->no_block = 0; + else + bitmap->no_block = pfn / PFN_BUFBITMAP; + } + return sadump_is_on(bitmap->buf, pfn % PFN_BUFBITMAP); +} + +static inline int +sadump_is_ram(mdf_pfn_t pfn) +{ + return sadump_is_dumpable(si->ram_bitmap, pfn); +} + +int +check_and_get_sadump_header_info(char *filename) +{ + int i; + + if (!read_sadump_header(filename)) + return FALSE; + + if (info->flag_sadump_diskset && info->flag_sadump == SADUMP_DISKSET) { + + si->diskset_info[0].fd_memory = info->fd_memory; + si->diskset_info[0].sph_memory = si->sph_memory; + si->diskset_info[0].data_offset = si->data_offset; + + for (i = 1; i < si->num_disks; ++i) { + struct sadump_diskset_info *sdi = + &si->diskset_info[i]; + + if ((sdi->fd_memory = + open(sdi->name_memory, O_RDONLY)) < 0) { + ERRMSG("Can't open the dump diskset " + "memory(%s). %s\n", sdi->name_memory, + strerror(errno)); + return FALSE; + } + + if (!read_sadump_header_diskset(i, sdi)) + return FALSE; + } + } + + return TRUE; +} + +static void +reverse_bit(char *buf, int len) +{ + int i; + unsigned char c; + + for (i = 0; i < len; i++) { + c = buf[i]; + c = ((c & 0x55) << 1) | ((c & 0xaa) >> 1); /* Swap 1bit */ + c = ((c & 0x33) << 2) | ((c & 0xcc) >> 2); /* Swap 2bit */ + c = (c << 4) | (c >> 4); /* Swap 4bit */ + buf[i] = c; + } +} + +int +sadump_copy_1st_bitmap_from_memory(void) +{ + struct sadump_header *sh = si->sh_memory; + char buf[si->sh_memory->block_size]; + off_t offset_page; + unsigned long bitmap_offset, bitmap_len; + mdf_pfn_t pfn, pfn_bitmap1; + extern mdf_pfn_t pfn_memhole; + + bitmap_offset = si->sub_hdr_offset + sh->block_size*sh->sub_hdr_size; + bitmap_len = sh->block_size * sh->bitmap_blocks; + + if (lseek(info->fd_memory, bitmap_offset, SEEK_SET) < 0) { + ERRMSG("Can't seek %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + if (lseek(info->bitmap1->fd, info->bitmap1->offset, SEEK_SET) < 0) { + ERRMSG("Can't seek the bitmap(%s). %s\n", + info->bitmap1->file_name, strerror(errno)); + return FALSE; + } + offset_page = 0; + while (offset_page < bitmap_len) { + if (read(info->fd_memory, buf, sizeof(buf)) != sizeof(buf)) { + ERRMSG("Can't read %s. %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + /* + * sadump formats associate each bit in a bitmap with + * a physical page in reverse order with the + * kdump-compressed format. We need to change bit + * order to reuse bitmaps in sadump formats in the + * kdump-compressed format. + */ + reverse_bit(buf, sizeof(buf)); + if (write(info->bitmap1->fd, buf, sizeof(buf)) != sizeof(buf)) { + ERRMSG("Can't write the bitmap(%s). %s\n", + info->bitmap1->file_name, strerror(errno)); + return FALSE; + } + offset_page += sizeof(buf); + } + + pfn_bitmap1 = 0; + for (pfn = 0; pfn < info->max_mapnr; ++pfn) { + if (sadump_is_ram(pfn)) + pfn_bitmap1++; + } + pfn_memhole = info->max_mapnr - pfn_bitmap1; + + /* + * kdump uses the first 640kB on the 2nd kernel. But both + * bitmaps should reflect the 1st kernel memory situation. We + * modify bitmap accordingly. + */ + if (si->kdump_backed_up) { + unsigned long long paddr; + mdf_pfn_t pfn, backup_src_pfn; + + for (paddr = si->backup_src_start; + paddr < si->backup_src_start + si->backup_src_size; + paddr += info->page_size) { + + pfn = paddr_to_pfn(paddr); + backup_src_pfn = paddr_to_pfn(paddr + + si->backup_offset - + si->backup_src_start); + + if (is_dumpable(info->bitmap_memory, backup_src_pfn, NULL)) + set_bit_on_1st_bitmap(pfn, NULL); + else + clear_bit_on_1st_bitmap(pfn, NULL); + } + } + + return TRUE; +} + +int +sadump_generate_vmcoreinfo_from_vmlinux(size_t *vmcoreinfo_size) +{ + size_t size; + + if (!info->file_vmcoreinfo) + return FALSE; + + if ((SYMBOL(system_utsname) == NOT_FOUND_SYMBOL) && + (SYMBOL(init_uts_ns) == NOT_FOUND_SYMBOL)) { + ERRMSG("Can't get the symbol of system_utsname.\n"); + return FALSE; + } + + if (get_mem_type() == NOT_FOUND_MEMTYPE) { + ERRMSG("Can't find the memory type.\n"); + return FALSE; + } + + strncpy(info->release, info->system_utsname.release, + strlen(info->system_utsname.release) + 1); + + write_vmcoreinfo_data(); + + size = ftell(info->file_vmcoreinfo); + + *vmcoreinfo_size = size; + + return TRUE; +} + +int +sadump_generate_elf_note_from_dumpfile(void) +{ + size_t size_vmcoreinfo, size_pt_note; + int x_cpu; + unsigned long offset, offset_vmcoreinfo; + char *vmcoreinfo_buf = NULL; + int retval = FALSE; + + if (!per_cpu_init()) + return FALSE; + + if (!(info->file_vmcoreinfo = tmpfile())) { + ERRMSG("Can't create a temporary strings(%s).\n", + FILENAME_VMCOREINFO); + return FALSE; + } + if (!sadump_generate_vmcoreinfo_from_vmlinux(&size_vmcoreinfo)) { + ERRMSG("Can't generate vmcoreinfo data.\n"); + goto error; + } + if ((vmcoreinfo_buf = malloc(size_vmcoreinfo)) == NULL) { + ERRMSG("Can't allocate vmcoreinfo buffer. %s\n", + strerror(errno)); + goto cleanup; + } + rewind(info->file_vmcoreinfo); + if (fread(vmcoreinfo_buf, size_vmcoreinfo, 1, + info->file_vmcoreinfo) != 1) { + ERRMSG("Can't read vmcoreinfo temporary file. %s\n", + strerror(errno)); + goto cleanup; + } + + if (!(si->file_elf_note = tmpfile())) { + ERRMSG("Can't create a temporary elf_note file. %s\n", + strerror(errno)); + goto cleanup; + } + if (!cpu_online_mask_init()) + goto cleanup; + offset = 0; + for_each_online_cpu(x_cpu) { + struct elf_prstatus prstatus; + + memset(&prstatus, 0, sizeof(prstatus)); + + if (!get_registers(x_cpu, &prstatus)) + goto cleanup; + + if (!write_elf_note_header("CORE", &prstatus, sizeof(prstatus), + NT_PRSTATUS, &offset, NULL)) + goto cleanup; + + } + + if (!write_elf_note_header("VMCOREINFO", vmcoreinfo_buf, + size_vmcoreinfo, 0, &offset, + &offset_vmcoreinfo)) + goto cleanup; + + size_pt_note = ftell(si->file_elf_note); + set_pt_note(0, size_pt_note); + set_vmcoreinfo(offset_vmcoreinfo, size_vmcoreinfo); + + retval = TRUE; + +cleanup: + free(vmcoreinfo_buf); + if (info->file_vmcoreinfo) { + fclose(info->file_vmcoreinfo); + info->file_vmcoreinfo = NULL; + } +error: + return retval; +} + +static char * +guid_to_str(efi_guid_t *guid, char *buf, size_t buflen) +{ + snprintf(buf, buflen, + "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", + htonl(guid->data1), htons(guid->data2), htons(guid->data3), + guid->data4[0], guid->data4[1], guid->data4[2], + guid->data4[3], guid->data4[4], guid->data4[5], + guid->data4[6], guid->data4[7]); + + return buf; +} + +static struct tm * +efi_time_t_to_tm(const efi_time_t *e) +{ + static struct tm t; + time_t ti; + + memset(&t, 0, sizeof(t)); + + t.tm_sec = e->second; + t.tm_min = e->minute; + t.tm_hour = e->hour; + t.tm_mday = e->day; + t.tm_mon = e->month - 1; + t.tm_year = e->year - 1900; + + if (e->timezone != EFI_UNSPECIFIED_TIMEZONE) + t.tm_hour += e->timezone; + + else + DEBUG_MSG("sadump: timezone information is missing\n"); + + ti = mktime(&t); + if (ti == (time_t)-1) + return &t; + + return localtime_r(&ti, &t); +} + +static int +verify_magic_number(uint32_t magicnum[DUMP_PART_HEADER_MAGICNUM_SIZE]) +{ + int i; + + for (i = 1; i < DUMP_PART_HEADER_MAGICNUM_SIZE; ++i) + if (magicnum[i] != (magicnum[i - 1] + 7) * 11) + return FALSE; + + return TRUE; +} + +static int +read_device(void *buf, size_t bytes, ulong *offset) +{ + if (lseek(info->fd_memory, *offset, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + if (read(info->fd_memory, buf, bytes) != bytes) { + ERRMSG("Can't read a file(%s). %s\n", + info->name_memory, strerror(errno)); + return FALSE; + } + *offset += bytes; + return TRUE; +} + +static int +read_device_diskset(struct sadump_diskset_info *sdi, void *buf, + size_t bytes, unsigned long *offset) +{ + if (lseek(sdi->fd_memory, *offset, SEEK_SET) < 0) { + ERRMSG("Can't seek a file(%s). %s\n", + sdi->name_memory, strerror(errno)); + return FALSE; + } + if (read(sdi->fd_memory, buf, bytes) != bytes) { + ERRMSG("Can't read a file(%s). %s\n", + sdi->name_memory, strerror(errno)); + return FALSE; + } + *offset += bytes; + return TRUE; +} + +static int +read_sadump_header(char *filename) +{ + struct sadump_part_header *sph = NULL; + struct sadump_header *sh = NULL; + struct sadump_disk_set_header *sdh = NULL; + struct sadump_media_header *smh = NULL; + unsigned long offset = 0, sub_hdr_offset; + unsigned long block_size = SADUMP_DEFAULT_BLOCK_SIZE; + unsigned long bitmap_len, dumpable_bitmap_len; + enum sadump_format_type flag_sadump; + uint32_t smram_cpu_state_size = 0; + char guid[SADUMP_EFI_GUID_TEXT_REPR_LEN+1]; + + if ((si->sph_memory = malloc(SADUMP_DEFAULT_BLOCK_SIZE)) == NULL) { + ERRMSG("Can't allocate memory for partition header buffer: " + "%s\n", strerror(errno)); + return FALSE; + } + + if ((si->sh_memory = malloc(SADUMP_DEFAULT_BLOCK_SIZE)) == NULL) { + ERRMSG("Can't allocate memory for dump header buffer: " + "%s\n", strerror(errno)); + return FALSE; + } + + if ((si->sdh_memory = malloc(SADUMP_DEFAULT_BLOCK_SIZE)) == NULL) { + ERRMSG("Can't allocate memory for disk set header buffer: " + "%s\n", strerror(errno)); + return FALSE; + } + + if ((si->smh_memory = malloc(SADUMP_DEFAULT_BLOCK_SIZE)) == NULL) { + ERRMSG("Can't allocate memory for media header buffer: " + "%s\n", strerror(errno)); + return FALSE; + } + + sph = si->sph_memory; + sh = si->sh_memory; + sdh = si->sdh_memory; + smh = si->smh_memory; + +restart: + if (!read_device(sph, block_size, &offset)) + return ERROR; + + if (sph->signature1 == SADUMP_SIGNATURE1 && + sph->signature2 == SADUMP_SIGNATURE2) { + + if (sph->set_disk_set == 0) { + + flag_sadump = SADUMP_SINGLE_PARTITION; + + DEBUG_MSG("sadump: read dump device as single partition\n"); + + } else { + + flag_sadump = SADUMP_DISKSET; + + DEBUG_MSG("sadump: read dump device as diskset\n"); + + } + + } else { + + offset = 0; + + if (!read_device(smh, block_size, &offset)) + return ERROR; + + if (!read_device(sph, block_size, &offset)) + return ERROR; + + if (sph->signature1 != SADUMP_SIGNATURE1 || + sph->signature2 != SADUMP_SIGNATURE2) { + + DEBUG_MSG("sadump: does not have partition header\n"); + + flag_sadump = SADUMP_UNKNOWN; + + DEBUG_MSG("sadump: read dump device as unknown format\n"); + + goto out; + } + + flag_sadump = SADUMP_MEDIA_BACKUP; + + DEBUG_MSG("sadump: read dump device as media backup format\n"); + + } + + if (!verify_magic_number(sph->magicnum)) { + DEBUG_MSG("sadump: invalid magic number\n"); + return FALSE; + } + + if (flag_sadump == SADUMP_DISKSET) { + uint32_t header_blocks; + size_t header_size; + + if (sph->set_disk_set != 1) { + DEBUG_MSG("sadump: id of this disk is %d\n", + sph->set_disk_set); + return FALSE; + } + + if (!read_device(&header_blocks, sizeof(uint32_t), + &offset)) + return FALSE; + + offset -= sizeof(uint32_t); + header_size = header_blocks * block_size; + + if (header_size > block_size) { + sdh = realloc(sdh, header_size); + if (!sdh) { + ERRMSG("Can't allocate memory for disk " + "set memory\n"); + return FALSE; + } + } + + if (!read_device(sdh, header_size, &offset)) + return ERROR; + + DEBUG_MSG("sadump: the diskset consists of %u disks\n", + sdh->disk_num); + + } + + if (!read_device(sh, block_size, &offset)) + return FALSE; + + sub_hdr_offset = offset; + + if (strncmp(sh->signature, SADUMP_SIGNATURE, 8) != 0) { + DEBUG_MSG("sadump: does not have dump header\n"); + return FALSE; + } + + if (flag_sadump == SADUMP_MEDIA_BACKUP) { + + if (memcmp(&sph->sadump_id, &smh->sadump_id, + sizeof(efi_guid_t)) != 0) { + DEBUG_MSG("sadump: system ID mismatch\n"); + DEBUG_MSG(" partition header: %s\n", + guid_to_str(&sph->sadump_id, guid, + sizeof(guid))); + DEBUG_MSG(" media header: %s\n", + guid_to_str(&smh->sadump_id, guid, + sizeof(guid))); + return FALSE; + } + + if (memcmp(&sph->disk_set_id, &smh->disk_set_id, + sizeof(efi_guid_t)) != 0) { + DEBUG_MSG("sadump: disk set ID mismatch\n"); + DEBUG_MSG(" partition header: %s\n", + guid_to_str(&sph->disk_set_id, guid, + sizeof(guid))); + DEBUG_MSG(" media header: %s\n", + guid_to_str(&smh->disk_set_id, guid, + sizeof(guid))); + return FALSE; + } + + if (memcmp(&sph->time_stamp, &smh->time_stamp, + sizeof(efi_time_t)) != 0) { + DEBUG_MSG("sadump: time stamp mismatch\n"); + DEBUG_MSG(" partition header: %s", + asctime(efi_time_t_to_tm(&sph->time_stamp))); + DEBUG_MSG(" media header: %s", + asctime(efi_time_t_to_tm(&smh->time_stamp))); + } + + if (smh->sequential_num != 1) { + DEBUG_MSG("sadump: first media file has sequential " + "number %d\n", smh->sequential_num); + return FALSE; + } + + } + + if (sh->block_size != block_size) { + block_size = sh->block_size; + offset = 0; + goto restart; + } + + if (sh->sub_hdr_size > 0) { + if (!read_device(&smram_cpu_state_size, sizeof(uint32_t), + &offset)) { + DEBUG_MSG("sadump: cannot read SMRAM CPU STATE size\n"); + return FALSE; + } + smram_cpu_state_size /= sh->nr_cpus; + + offset -= sizeof(uint32_t); + offset += sh->sub_hdr_size * block_size; + } + + switch (sh->header_version) { + case 0: + si->max_mapnr = (mdf_pfn_t)(uint64_t)sh->max_mapnr; + break; + default: + ERRMSG("sadump: unsupported header version: %u\n" + "sadump: assuming header version: 1\n", + sh->header_version); + case 1: + si->max_mapnr = (mdf_pfn_t)sh->max_mapnr_64; + break; + } + + if (!sh->bitmap_blocks) { + DEBUG_MSG("sadump: bitmap_blocks is zero\n"); + return FALSE; + } + + if (!sh->dumpable_bitmap_blocks) { + DEBUG_MSG("sadump: dumpable_bitmap_blocks is zero\n"); + return FALSE; + } + + bitmap_len = block_size * sh->bitmap_blocks; + dumpable_bitmap_len = block_size * sh->dumpable_bitmap_blocks; + + si->sub_hdr_offset = sub_hdr_offset; + si->smram_cpu_state_size = smram_cpu_state_size; + si->data_offset = offset + bitmap_len + dumpable_bitmap_len; + +out: + switch (flag_sadump) { + case SADUMP_SINGLE_PARTITION: + DEBUG_MSG("sadump: single partition configuration\n"); + break; + case SADUMP_DISKSET: + DEBUG_MSG("sadump: diskset configuration with %d disks\n", + sdh->disk_num); + break; + case SADUMP_MEDIA_BACKUP: + DEBUG_MSG("sadump: media backup file\n"); + break; + case SADUMP_UNKNOWN: + DEBUG_MSG("sadump: unknown format\n"); + break; + } + + info->flag_sadump = flag_sadump; + + return TRUE; +} + +static int +read_sadump_header_diskset(int diskid, struct sadump_diskset_info *sdi) +{ + struct sadump_part_header *sph = NULL; + unsigned long offset = 0; + char guid[SADUMP_EFI_GUID_TEXT_REPR_LEN+1]; + + if ((sph = malloc(si->sh_memory->block_size)) == NULL) { + ERRMSG("Can't allocate memory for partition header buffer. " + "%s\n", strerror(errno)); + goto error; + } + + if (!read_device_diskset(sdi, sph, si->sh_memory->block_size, + &offset)) + goto error; + + if (sph->signature1 != SADUMP_SIGNATURE1 || + sph->signature2 != SADUMP_SIGNATURE2) { + DEBUG_MSG("sadump: does not have partition header\n"); + goto error; + } + + if (memcmp(&si->sph_memory->sadump_id, &sph->sadump_id, + sizeof(efi_guid_t)) != 0) { + DEBUG_MSG("sadump: system ID mismatch\n"); + DEBUG_MSG(" partition header on disk #1: %s\n", + guid_to_str(&si->sph_memory->sadump_id, guid, + sizeof(guid))); + DEBUG_MSG(" partition header on disk #%d: %s\n", diskid, + guid_to_str(&sph->sadump_id, guid, sizeof(guid))); + goto error; + } + + if (memcmp(&si->sph_memory->disk_set_id, &sph->disk_set_id, + sizeof(efi_guid_t)) != 0) { + DEBUG_MSG("sadump: disk set ID mismatch\n"); + DEBUG_MSG(" partition header on disk #1: %s\n", + guid_to_str(&si->sph_memory->disk_set_id, guid, + sizeof(guid))); + DEBUG_MSG(" partition header on disk #%d: %s\n", diskid, + guid_to_str(&sph->disk_set_id, guid, sizeof(guid))); + goto error; + } + + if (memcmp(&si->sdh_memory->vol_info[diskid-1].id, &sph->vol_id, + sizeof(efi_guid_t)) != 0) { + DEBUG_MSG("sadump: volume ID mismatch\n"); + DEBUG_MSG(" disk set header on disk #1: %s\n", + guid_to_str(&si->sdh_memory->vol_info[diskid-1].id, + guid, sizeof(guid))); + DEBUG_MSG(" partition header on disk #%d: %s\n", + diskid+1, + guid_to_str(&sph->vol_id, guid, sizeof(guid))); + goto error; + } + + if (memcmp(&si->sph_memory->time_stamp, &sph->time_stamp, + sizeof(efi_time_t)) != 0) { + DEBUG_MSG("sadump time stamp mismatch\n"); + DEBUG_MSG(" partition header on disk #1: %s\n", + asctime(efi_time_t_to_tm + (&si->sph_memory->time_stamp))); + DEBUG_MSG(" partition header on disk #%d: %s\n", + diskid, asctime(efi_time_t_to_tm(&sph->time_stamp))); + } + + if (diskid+1 != sph->set_disk_set) { + DEBUG_MSG("sadump: wrong disk order; #%d expected but #%d given\n", + diskid+1, sph->set_disk_set); + goto error; + } + + sdi->sph_memory = sph; + sdi->data_offset = si->sh_memory->block_size; + + return TRUE; + +error: + free(sph); + + return FALSE; +} + +int +sadump_initialize_bitmap_memory(void) +{ + struct sadump_header *sh = si->sh_memory; + struct dump_bitmap *bmp; + unsigned long dumpable_bitmap_offset; + unsigned long long section, max_section; + mdf_pfn_t pfn; + unsigned long long *block_table; + + dumpable_bitmap_offset = + si->sub_hdr_offset + + sh->block_size * (sh->sub_hdr_size + sh->bitmap_blocks); + + bmp = malloc(sizeof(struct dump_bitmap)); + if (bmp == NULL) { + ERRMSG("Can't allocate memory for the memory-bitmap. %s\n", + strerror(errno)); + return FALSE; + } + + bmp->fd = info->fd_memory; + bmp->file_name = info->name_memory; + bmp->no_block = -1; + bmp->offset = dumpable_bitmap_offset; + + bmp->buf = malloc(BUFSIZE_BITMAP); + if (!bmp->buf) { + ERRMSG("Can't allocate memory for the memory-bitmap's buffer. %s\n", + strerror(errno)); + free(bmp); + return FALSE; + } + memset(bmp->buf, 0, BUFSIZE_BITMAP); + + max_section = divideup(si->max_mapnr, SADUMP_PF_SECTION_NUM); + + block_table = calloc(sizeof(unsigned long long), max_section); + if (block_table == NULL) { + ERRMSG("Can't allocate memory for the block_table. %s\n", + strerror(errno)); + free(bmp->buf); + free(bmp); + return FALSE; + } + + for (section = 0; section < max_section; ++section) { + if (section > 0) + block_table[section] = block_table[section-1]; + for (pfn = section * SADUMP_PF_SECTION_NUM; + pfn < (section + 1) * SADUMP_PF_SECTION_NUM; + ++pfn) + if (is_dumpable(bmp, pfn, NULL)) + block_table[section]++; + } + + info->bitmap_memory = bmp; + si->block_table = block_table; + + bmp = malloc(sizeof(struct dump_bitmap)); + if (bmp == NULL) { + ERRMSG("Can't allocate memory for the memory-bitmap. %s\n", + strerror(errno)); + return FALSE; + } + bmp->fd = info->fd_memory; + bmp->file_name = info->name_memory; + bmp->no_block = -1; + bmp->offset = si->sub_hdr_offset + sh->block_size * sh->sub_hdr_size; + + bmp->buf = malloc(BUFSIZE_BITMAP); + if (!bmp->buf) { + ERRMSG("Can't allocate memory for the memory-bitmap's buffer. %s\n", + strerror(errno)); + free(bmp); + return FALSE; + } + memset(bmp->buf, 0, BUFSIZE_BITMAP); + + si->ram_bitmap = bmp; + + /* + * Perform explicitly zero filtering. Without this processing + * crash utility faces different behaviors on reading zero + * pages that are filtered out on the kdump-compressed format + * originating from kdump ELF and from sadump formats: the + * former succeeds in reading zero pages but the latter fails. + */ + for (pfn = 0; pfn < si->max_mapnr; pfn++) { + if (sadump_is_ram(pfn) && + !sadump_is_dumpable(info->bitmap_memory, pfn)) { + info->dump_level |= DL_EXCLUDE_ZERO; + break; + } + } + + return TRUE; +} + +static int +max_mask_cpu(void) +{ + return BITPERBYTE * si->cpumask_size; +} + +static int +cpu_online_mask_init(void) +{ + ulong cpu_online_mask_addr; + + if (si->cpu_online_mask_buf && si->cpumask_size) + return TRUE; + + if (SYMBOL(cpu_online_mask) == NOT_FOUND_SYMBOL || + (SIZE(cpumask) == NOT_FOUND_STRUCTURE && + SIZE(cpumask_t) == NOT_FOUND_STRUCTURE)) + return FALSE; + + si->cpumask_size = SIZE(cpumask) == NOT_FOUND_STRUCTURE + ? SIZE(cpumask_t) + : SIZE(cpumask); + + if (!(si->cpu_online_mask_buf = calloc(1, si->cpumask_size))) { + ERRMSG("Can't allocate cpu_online_mask buffer. %s\n", + strerror(errno)); + return FALSE; + } + + if ((SIZE(cpumask) == NOT_FOUND_STRUCTURE) || + (SYMBOL(__cpu_online_mask) != NOT_FOUND_SYMBOL)) + cpu_online_mask_addr = SYMBOL(cpu_online_mask); + + else { + if (!readmem(VADDR, SYMBOL(cpu_online_mask), + &cpu_online_mask_addr, sizeof(unsigned long))) { + ERRMSG("Can't read cpu_online_mask pointer.\n"); + return FALSE; + } + + } + + if (!readmem(VADDR, cpu_online_mask_addr, si->cpu_online_mask_buf, + si->cpumask_size)) { + ERRMSG("Can't read cpu_online_mask memory.\n"); + return FALSE; + } + + return TRUE; +} + +int +sadump_num_online_cpus(void) +{ + int cpu, count = 0; + + if (!cpu_online_mask_init()) + return FALSE; + + DEBUG_MSG("sadump: online cpus:"); + + for_each_online_cpu(cpu) { + count++; + DEBUG_MSG(" %d", cpu); + } + + DEBUG_MSG("\nsadump: nr_cpus: %d\n", count); + + return count; +} + +int +sadump_set_timestamp(struct timeval *ts) +{ + static struct tm t; + efi_time_t *e = &si->sph_memory->time_stamp; + time_t ti; + + memset(&t, 0, sizeof(t)); + + t.tm_sec = e->second; + t.tm_min = e->minute; + t.tm_hour = e->hour; + t.tm_mday = e->day; + t.tm_mon = e->month - 1; + t.tm_year = e->year - 1900; + + if (e->timezone != EFI_UNSPECIFIED_TIMEZONE) + t.tm_hour += e->timezone; + + else + DEBUG_MSG("sadump: timezone information is missing\n"); + + ti = mktime(&t); + if (ti == (time_t)-1) + return FALSE; + + ts->tv_sec = ti; + ts->tv_usec = 0; + + return TRUE; +} + +mdf_pfn_t +sadump_get_max_mapnr(void) +{ + return si->max_mapnr; +} + +#ifdef __x86_64__ + +/* + * Get address of vector0 interrupt handler (Devide Error) form Interrupt + * Descriptor Table. + */ +static unsigned long +get_vec0_addr(ulong idtr) +{ + struct gate_struct64 { + uint16_t offset_low; + uint16_t segment; + uint32_t ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; + uint16_t offset_middle; + uint32_t offset_high; + uint32_t zero1; + } __attribute__((packed)) gate; + + readmem(PADDR, idtr, &gate, sizeof(gate)); + + return ((ulong)gate.offset_high << 32) + + ((ulong)gate.offset_middle << 16) + + gate.offset_low; +} + +/* + * Parse a string of [size[KMG]@]offset[KMG] + * Import from Linux kernel(lib/cmdline.c) + */ +static ulong memparse(char *ptr, char **retptr) +{ + char *endptr; + + unsigned long long ret = strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} + +/* + * Find "elfcorehdr=" in the boot parameter of kernel and return the address + * of elfcorehdr. + */ +static ulong +get_elfcorehdr(ulong cr3) +{ + char cmdline[BUFSIZE], *ptr; + ulong cmdline_vaddr; + ulong cmdline_paddr; + ulong buf_vaddr, buf_paddr; + char *end; + ulong elfcorehdr_addr = 0, elfcorehdr_size = 0; + + if (SYMBOL(saved_command_line) == NOT_FOUND_SYMBOL) { + ERRMSG("Can't get the symbol of saved_command_line.\n"); + return 0; + } + cmdline_vaddr = SYMBOL(saved_command_line); + if ((cmdline_paddr = vtop4_x86_64_pagetable(cmdline_vaddr, cr3)) == NOT_PADDR) + return 0; + + DEBUG_MSG("sadump: cmdline vaddr: %lx\n", cmdline_vaddr); + DEBUG_MSG("sadump: cmdline paddr: %lx\n", cmdline_paddr); + + if (!readmem(PADDR, cmdline_paddr, &buf_vaddr, sizeof(ulong))) + return 0; + + if ((buf_paddr = vtop4_x86_64_pagetable(buf_vaddr, cr3)) == NOT_PADDR) + return 0; + + DEBUG_MSG("sadump: cmdline buf vaddr: %lx\n", buf_vaddr); + DEBUG_MSG("sadump: cmdline buf paddr: %lx\n", buf_paddr); + + memset(cmdline, 0, BUFSIZE); + if (!readmem(PADDR, buf_paddr, cmdline, BUFSIZE)) + return 0; + + ptr = strstr(cmdline, "elfcorehdr="); + if (!ptr) + return 0; + + DEBUG_MSG("sadump: 2nd kernel detected.\n"); + + ptr += strlen("elfcorehdr="); + elfcorehdr_addr = memparse(ptr, &end); + if (*end == '@') { + elfcorehdr_size = elfcorehdr_addr; + elfcorehdr_addr = memparse(end + 1, &end); + } + + DEBUG_MSG("sadump: elfcorehdr_addr: %lx\n", elfcorehdr_addr); + DEBUG_MSG("sadump: elfcorehdr_size: %lx\n", elfcorehdr_size); + + return elfcorehdr_addr; +} + +/* + * Get vmcoreinfo from elfcorehdr. + * Some codes are imported from Linux kernel(fs/proc/vmcore.c) + */ +static int +get_vmcoreinfo_in_kdump_kernel(ulong elfcorehdr, ulong *addr, int *len) +{ + unsigned char e_ident[EI_NIDENT]; + Elf64_Ehdr ehdr; + Elf64_Phdr phdr; + Elf64_Nhdr nhdr; + ulong ptr; + ulong nhdr_offset = 0; + int i; + + if (!readmem(PADDR, elfcorehdr, e_ident, EI_NIDENT)) + return FALSE; + + if (e_ident[EI_CLASS] != ELFCLASS64) { + ERRMSG("Only ELFCLASS64 is supportd\n"); + return FALSE; + } + + if (!readmem(PADDR, elfcorehdr, &ehdr, sizeof(ehdr))) + return FALSE; + + /* Sanity Check */ + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || + (ehdr.e_type != ET_CORE) || + ehdr.e_ident[EI_CLASS] != ELFCLASS64 || + ehdr.e_ident[EI_VERSION] != EV_CURRENT || + ehdr.e_version != EV_CURRENT || + ehdr.e_ehsize != sizeof(Elf64_Ehdr) || + ehdr.e_phentsize != sizeof(Elf64_Phdr) || + ehdr.e_phnum == 0) { + ERRMSG("Invalid elf header\n"); + return FALSE; + } + + ptr = elfcorehdr + ehdr.e_phoff; + for (i = 0; i < ehdr.e_phnum; i++) { + ulong offset; + char name[16]; + + if (!readmem(PADDR, ptr, &phdr, sizeof(phdr))) + return FALSE; + + ptr += sizeof(phdr); + if (phdr.p_type != PT_NOTE) + continue; + + offset = phdr.p_offset; + if (!readmem(PADDR, offset, &nhdr, sizeof(nhdr))) + return FALSE; + + offset += divideup(sizeof(Elf64_Nhdr), sizeof(Elf64_Word))* + sizeof(Elf64_Word); + memset(name, 0, sizeof(name)); + if (!readmem(PADDR, offset, name, sizeof(name))) + return FALSE; + + if(!strcmp(name, "VMCOREINFO")) { + nhdr_offset = offset; + break; + } + } + + if (!nhdr_offset) + return FALSE; + + *addr = nhdr_offset + + divideup(nhdr.n_namesz, sizeof(Elf64_Word))* + sizeof(Elf64_Word); + *len = nhdr.n_descsz; + + DEBUG_MSG("sadump: vmcoreinfo addr: %lx\n", *addr); + DEBUG_MSG("sadump: vmcoreinfo len: %d\n", *len); + + return TRUE; +} + +/* + * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd kernel. + * If we are in 2nd kernel, get kaslr_offset/phys_base from vmcoreinfo. + * + * 1. Get command line and try to retrieve "elfcorehdr=" boot parameter + * 2. If "elfcorehdr=" is not found in command line, we are in 1st kernel. + * There is nothing to do. + * 3. If "elfcorehdr=" is found, we are in 2nd kernel. Find vmcoreinfo + * using "elfcorehdr=" and retrieve kaslr_offset/phys_base from vmcoreinfo. + */ +int +get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong *kaslr_offset, + ulong *phys_base) +{ + ulong elfcorehdr_addr = 0; + ulong vmcoreinfo_addr; + int vmcoreinfo_len; + char *buf, *pos; + int ret = FALSE; + + elfcorehdr_addr = get_elfcorehdr(cr3); + if (!elfcorehdr_addr) + return FALSE; + + if (!get_vmcoreinfo_in_kdump_kernel(elfcorehdr_addr, &vmcoreinfo_addr, + &vmcoreinfo_len)) + return FALSE; + + if (!vmcoreinfo_len) + return FALSE; + + DEBUG_MSG("sadump: Find vmcoreinfo in kdump memory\n"); + + if (!(buf = malloc(vmcoreinfo_len))) { + ERRMSG("Can't allocate vmcoreinfo buffer.\n"); + return FALSE; + } + + if (!readmem(PADDR, vmcoreinfo_addr, buf, vmcoreinfo_len)) + goto finish; + + pos = strstr(buf, STR_NUMBER("phys_base")); + if (!pos) + goto finish; + *phys_base = strtoull(pos + strlen(STR_NUMBER("phys_base")), NULL, 0); + + pos = strstr(buf, STR_KERNELOFFSET); + if (!pos) + goto finish; + *kaslr_offset = strtoull(pos + strlen(STR_KERNELOFFSET), NULL, 16); + ret = TRUE; + +finish: + free(buf); + return ret; +} + +/* + * Calculate kaslr_offset and phys_base + * + * kaslr_offset: + * The difference between original address in vmlinux and actual address + * placed randomly by kaslr feature. To be more accurate, + * kaslr_offset = actual address - original address + * + * phys_base: + * Physical address where the kerenel is placed. In other words, it's a + * physical address of __START_KERNEL_map. This is also decided randomly by + * kaslr. + * + * kaslr offset and phys_base are calculated as follows: + * + * kaslr_offset: + * 1) Get IDTR and CR3 value from the dump header. + * 2) Get a virtual address of IDT from IDTR value + * --- (A) + * 3) Translate (A) to physical address using CR3, which points a top of + * page table. + * --- (B) + * 4) Get an address of vector0 (Devide Error) interrupt handler from + * IDT, which are pointed by (B). + * --- (C) + * 5) Get an address of symbol "divide_error" form vmlinux + * --- (D) + * + * Now we have two addresses: + * (C)-> Actual address of "divide_error" + * (D)-> Original address of "divide_error" in the vmlinux + * + * kaslr_offset can be calculated by the difference between these two + * value. + * + * phys_base; + * 1) Get IDT virtual address from vmlinux + * --- (E) + * + * So phys_base can be calculated using relationship of directly mapped + * address. + * + * phys_base = + * Physical address(B) - + * (Virtual address(E) + kaslr_offset - __START_KERNEL_map) + * + * Note that the address (A) cannot be used instead of (E) because (A) is + * not direct map address, it's a fixed map address. + * + * This solution works in most every case, but does not work in the + * following case. + * + * 1) If the dump is captured on early stage of kernel boot, IDTR points + * early IDT table(early_idts) instead of normal IDT(idt_table). + * 2) If the dump is captured whle kdump is working, IDTR points + * IDT table of 2nd kernel, not 1st kernel. + * + * Current implementation does not support the case 1), need + * enhancement in the future. For the case 2), get kaslr_offset and + * phys_base as follows. + * + * 1) Get kaslr_offset and phys_base using the above solution. + * 2) Get kernel boot parameter from "saved_command_line" + * 3) If "elfcorehdr=" is not included in boot parameter, we are in the + * first kernel, nothing to do any more. + * 4) If "elfcorehdr=" is included in boot parameter, we are in the 2nd + * kernel. Retrieve vmcoreinfo from address of "elfcorehdr=" and + * get kaslr_offset and phys_base from vmcoreinfo. + */ +#define PTI_USER_PGTABLE_BIT (info->page_shift) +#define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT) +#define CR3_PCID_MASK 0xFFFull +int +calc_kaslr_offset(void) +{ + struct sadump_header *sh = si->sh_memory; + uint64_t idtr = 0, cr3 = 0, idtr_paddr; + struct sadump_smram_cpu_state smram, zero; + int apicid; + unsigned long divide_error_vmcore, divide_error_vmlinux; + unsigned long kaslr_offset, phys_base; + unsigned long kaslr_offset_kdump, phys_base_kdump; + + memset(&zero, 0, sizeof(zero)); + for (apicid = 0; apicid < sh->nr_cpus; ++apicid) { + if (!get_smram_cpu_state(apicid, &smram)) { + ERRMSG("get_smram_cpu_state error\n"); + return FALSE; + } + + if (memcmp(&smram, &zero, sizeof(smram)) != 0) + break; + } + if (apicid >= sh->nr_cpus) { + ERRMSG("Can't get smram state\n"); + return FALSE; + } + + idtr = ((uint64_t)smram.IdtUpper)<<32 | (uint64_t)smram.IdtLower; + if ((SYMBOL(pti_init) != NOT_FOUND_SYMBOL) || + (SYMBOL(kaiser_init) != NOT_FOUND_SYMBOL)) + cr3 = smram.Cr3 & ~(CR3_PCID_MASK|PTI_USER_PGTABLE_MASK); + else + cr3 = smram.Cr3 & ~CR3_PCID_MASK; + + /* Convert virtual address of IDT table to physical address */ + if ((idtr_paddr = vtop4_x86_64_pagetable(idtr, cr3)) == NOT_PADDR) + return FALSE; + + /* Now we can calculate kaslr_offset and phys_base */ + divide_error_vmlinux = SYMBOL(divide_error); + divide_error_vmcore = get_vec0_addr(idtr_paddr); + kaslr_offset = divide_error_vmcore - divide_error_vmlinux; + phys_base = idtr_paddr - + (SYMBOL(idt_table) + kaslr_offset - __START_KERNEL_map); + + info->kaslr_offset = kaslr_offset; + info->phys_base = phys_base; + + DEBUG_MSG("sadump: idtr=%" PRIx64 "\n", idtr); + DEBUG_MSG("sadump: cr3=%" PRIx64 "\n", cr3); + DEBUG_MSG("sadump: idtr(phys)=%" PRIx64 "\n", idtr_paddr); + DEBUG_MSG("sadump: devide_error(vmlinux)=%lx\n", + divide_error_vmlinux); + DEBUG_MSG("sadump: devide_error(vmcore)=%lx\n", + divide_error_vmcore); + + /* Reload symbol */ + if (!get_symbol_info()) + return FALSE; + + /* + * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd + * kernel. If we are in 2nd kernel, get kaslr_offset/phys_base + * from vmcoreinfo + */ + if (get_kaslr_offset_from_vmcoreinfo(cr3, &kaslr_offset_kdump, + &phys_base_kdump)) { + info->kaslr_offset = kaslr_offset_kdump; + info->phys_base = phys_base_kdump; + + /* Reload symbol */ + if (!get_symbol_info()) + return FALSE; + } + + DEBUG_MSG("sadump: kaslr_offset=%lx\n", info->kaslr_offset); + DEBUG_MSG("sadump: phys_base=%lx\n", info->phys_base); + + return TRUE; +} + +int +sadump_virt_phys_base(void) +{ + char buf[BUFSIZE]; + unsigned long phys, linux_banner_phys; + + if (SYMBOL(linux_banner) == NOT_FOUND_SYMBOL) { + DEBUG_MSG("sadump: symbol linux_banner is not found\n"); + goto failed; + } + + linux_banner_phys = SYMBOL(linux_banner) - __START_KERNEL_map; + + if (readmem(PADDR, linux_banner_phys + info->phys_base, buf, + strlen("Linux version")) && STRNEQ(buf, "Linux version")) + return TRUE; + + for (phys = (-MEGABYTES(16)); phys != MEGABYTES(16+1); + phys += MEGABYTES(1)) { + if (readmem(PADDR, linux_banner_phys + phys, buf, + strlen("Linux version")) && + STRNEQ(buf, "Linux version")) { + DEBUG_MSG("sadump: phys_base: %lx %s\n", phys, + info->phys_base != phys ? "override" : ""); + info->phys_base = phys; + return TRUE; + } + } + +failed: + if (calc_kaslr_offset()) + return TRUE; + + info->phys_base = 0; + + DEBUG_MSG("sadump: failed to calculate phys_base; default to 0\n"); + + return FALSE; +} + +#endif /* __x86_64__ */ + +int +readpage_sadump(unsigned long long paddr, void *bufptr) +{ + mdf_pfn_t pfn; + unsigned long long block, whole_offset, perdisk_offset; + int fd_memory; + + if (si->kdump_backed_up && + paddr >= si->backup_src_start && + paddr < si->backup_src_start + si->backup_src_size) + paddr += si->backup_offset - si->backup_src_start; + + pfn = paddr_to_pfn(paddr); + + if (pfn >= si->max_mapnr) + return FALSE; + + if (!sadump_is_ram(pfn)) { + ERRMSG("pfn(%llx) is not ram.\n", pfn); + return FALSE; + } + + if (!sadump_is_dumpable(info->bitmap_memory, pfn)) { + memset(bufptr, 0, info->page_size); + return TRUE; + } + + block = pfn_to_block(pfn); + whole_offset = block * si->sh_memory->block_size; + + if (info->flag_sadump == SADUMP_DISKSET) { + int diskid; + + if (!lookup_diskset(whole_offset, &diskid, &perdisk_offset)) + return FALSE; + + fd_memory = si->diskset_info[diskid].fd_memory; + perdisk_offset += si->diskset_info[diskid].data_offset; + + } else { + fd_memory = info->fd_memory; + perdisk_offset = whole_offset + si->data_offset; + + } + + if (lseek(fd_memory, perdisk_offset, SEEK_SET) < 0) + return FALSE; + + if (read(fd_memory, bufptr, info->page_size) != info->page_size) + return FALSE; + + return TRUE; +} + +int +sadump_check_debug_info(void) +{ + if (SYMBOL(linux_banner) == NOT_FOUND_SYMBOL) + return FALSE; + if (SYMBOL(bios_cpu_apicid) == NOT_FOUND_SYMBOL && + SYMBOL(x86_bios_cpu_apicid) == NOT_FOUND_SYMBOL) + return FALSE; + if (SYMBOL(x86_bios_cpu_apicid) != NOT_FOUND_SYMBOL && + (SYMBOL(x86_bios_cpu_apicid_early_ptr) == NOT_FOUND_SYMBOL || + SYMBOL(x86_bios_cpu_apicid_early_map) == NOT_FOUND_SYMBOL)) + return FALSE; + if (SYMBOL(crash_notes) == NOT_FOUND_SYMBOL) + return FALSE; + if (SIZE(percpu_data) == NOT_FOUND_STRUCTURE && + SYMBOL(__per_cpu_load) == NOT_FOUND_SYMBOL) + return FALSE; + if (SYMBOL(__per_cpu_load) != NOT_FOUND_SYMBOL && + (SYMBOL(__per_cpu_offset) == NOT_FOUND_SYMBOL && + ARRAY_LENGTH(__per_cpu_offset) == NOT_FOUND_STRUCTURE)) + return FALSE; + if (SIZE(elf_prstatus) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(elf_prstatus.pr_reg) == NOT_FOUND_STRUCTURE) + return FALSE; +#ifdef __x86__ + if (OFFSET(user_regs_struct.bx) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.cx) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.dx) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.si) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.di) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.bp) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.ax) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.ds) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.es) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.fs) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.gs) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.orig_ax) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.ip) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.cs) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.flags) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.sp) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.ss) == NOT_FOUND_STRUCTURE) + return FALSE; +#elif defined(__x86_64__) + if (OFFSET(user_regs_struct.r15) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.r14) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.r13) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.r12) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.bp) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.bx) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.r11) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.r10) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.r9) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.r8) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.ax) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.cx) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.dx) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.si) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.di) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.orig_ax) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.ip) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.cs) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.flags) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.sp) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.ss) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.fs_base) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.gs_base) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.ds) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.es) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.fs) == NOT_FOUND_STRUCTURE) + return FALSE; + if (OFFSET(user_regs_struct.gs) == NOT_FOUND_STRUCTURE) + return FALSE; +#endif /* __x86_64__ */ + return TRUE; +} + +static unsigned long long +pfn_to_block(mdf_pfn_t pfn) +{ + unsigned long long block, section, p; + + section = pfn / SADUMP_PF_SECTION_NUM; + + if (section) + block = si->block_table[section - 1]; + else + block = 0; + + for (p = section * SADUMP_PF_SECTION_NUM; p < pfn; ++p) + if (sadump_is_dumpable(info->bitmap_memory, p)) + block++; + + return block; +} + +static int +lookup_diskset(unsigned long long whole_offset, int *diskid, + unsigned long long *disk_offset) +{ + unsigned long long offset = whole_offset; + int i; + + for (i = 0; i < si->num_disks; ++i) { + struct sadump_diskset_info *sdi = &si->diskset_info[i]; + unsigned long long used_device_i, data_offset_i, ram_size; + + used_device_i = sdi->sph_memory->used_device; + data_offset_i = sdi->data_offset; + + ram_size = used_device_i - data_offset_i; + + if (offset < ram_size) + break; + offset -= ram_size; + } + + if (i == si->num_disks) + return FALSE; + + *diskid = i; + *disk_offset = offset; + + return TRUE; +} + +static int +per_cpu_init(void) +{ + size_t __per_cpu_offset_size; + int i; + + if (SIZE(percpu_data) != NOT_FOUND_STRUCTURE) + return TRUE; + + __per_cpu_offset_size = + ARRAY_LENGTH(__per_cpu_offset) * sizeof(unsigned long); + + if (!(si->__per_cpu_offset = malloc(__per_cpu_offset_size))) { + ERRMSG("Can't allocate __per_cpu_offset buffer.\n"); + return FALSE; + } + + if (!readmem(VADDR, SYMBOL(__per_cpu_offset), si->__per_cpu_offset, + __per_cpu_offset_size)) { + ERRMSG("Can't read __per_cpu_offset memory.\n"); + return FALSE; + } + + if (!readmem(VADDR, SYMBOL(__per_cpu_load), &si->__per_cpu_load, + sizeof(unsigned long))) { + ERRMSG("Can't read __per_cpu_load memory.\n"); + return FALSE; + } + + DEBUG_MSG("sadump: __per_cpu_load: %#lx\n", si->__per_cpu_load); + DEBUG_MSG("sadump: __per_cpu_offset: LENGTH: %ld\n", + ARRAY_LENGTH(__per_cpu_offset)); + + for (i = 0; i < ARRAY_LENGTH(__per_cpu_offset); ++i) { + DEBUG_MSG("sadump: __per_cpu_offset[%d]: %#lx\n", i, + si->__per_cpu_offset[i]); + } + + return TRUE; +} + +static int +get_data_from_elf_note_desc(const char *note_buf, uint32_t n_descsz, + char *name, uint32_t n_type, char **data) +{ + Elf32_Nhdr *note32; + char *note_name; + + note32 = (Elf32_Nhdr *)note_buf; + note_name = (char *)(note32 + 1); + + if (note32->n_type != n_type || + note32->n_namesz != strlen(name) + 1 || + note32->n_descsz != n_descsz || + strncmp(note_name, name, note32->n_namesz)) + return FALSE; + + *data = (char *)note_buf + + roundup(sizeof(Elf32_Nhdr) + note32->n_namesz, 4); + + return TRUE; +} + +static int +alignfile(unsigned long *offset) +{ + char nullbyte = '\0'; + unsigned int len; + + len = roundup(*offset, 4) - *offset; + if (fwrite(&nullbyte, 1, len, si->file_elf_note) != len) { + ERRMSG("Can't write elf_note file. %s\n", strerror(errno)); + return FALSE; + } + *offset += len; + return TRUE; +} + +static int +write_elf_note_header(char *name, void *data, size_t descsz, uint32_t type, + unsigned long *offset, unsigned long *desc_offset) +{ + Elf32_Nhdr nhdr; + + nhdr.n_namesz = strlen(name) + 1; + nhdr.n_descsz = descsz; + nhdr.n_type = type; + + if (fwrite(&nhdr, sizeof(nhdr), 1, si->file_elf_note) != 1) { + ERRMSG("Can't write elf_note file. %s\n", strerror(errno)); + return FALSE; + } + *offset += sizeof(nhdr); + + if (fwrite(name, nhdr.n_namesz, 1, si->file_elf_note) != 1) { + ERRMSG("Can't write elf_note file. %s\n", strerror(errno)); + return FALSE; + } + *offset += nhdr.n_namesz; + if (!alignfile(offset)) + return FALSE; + + if (desc_offset) + *desc_offset = *offset; + + if (fwrite(data, nhdr.n_descsz, 1, si->file_elf_note) != 1) { + ERRMSG("Can't write elf_note file. %s\n", strerror(errno)); + return FALSE; + } + *offset += nhdr.n_descsz; + if (!alignfile(offset)) + return FALSE; + + return TRUE; +} + +static int +is_online_cpu(int cpu) +{ + unsigned long mask; + + if (cpu < 0 || cpu >= max_mask_cpu()) + return FALSE; + + mask = ULONG(si->cpu_online_mask_buf + + (cpu / BITPERWORD) * sizeof(unsigned long)); + + return (mask & (1UL << (cpu % BITPERWORD))) ? TRUE : FALSE; +} + +static unsigned long +legacy_per_cpu_ptr(unsigned long ptr, int cpu) +{ + unsigned long addr; + + if (!is_online_cpu(cpu)) + return 0UL; + + if (!readmem(VADDR, ~ptr + cpu*sizeof(unsigned long), &addr, + sizeof(addr))) + return 0UL; + + return addr; +} + +static unsigned long +per_cpu_ptr(unsigned long ptr, int cpu) +{ + if (!is_online_cpu(cpu)) + return 0UL; + + if (si->__per_cpu_offset[cpu] == si->__per_cpu_load) + return 0UL; + + return ptr + si->__per_cpu_offset[cpu]; +} + +static int +get_prstatus_from_crash_notes(int cpu, char *prstatus_buf) +{ + unsigned long crash_notes_vaddr, percpu_addr; + char note_buf[KEXEC_NOTE_BYTES], zero_buf[KEXEC_NOTE_BYTES]; + char *prstatus_ptr; + + if (!is_online_cpu(cpu)) + return FALSE; + + if (SYMBOL(crash_notes) == NOT_FOUND_SYMBOL) + return FALSE; + + if (!readmem(VADDR, SYMBOL(crash_notes), &crash_notes_vaddr, + sizeof(crash_notes_vaddr))) + return FALSE; + + if (!crash_notes_vaddr) { + DEBUG_MSG("sadump: crash_notes %d is NULL\n", cpu); + return FALSE; + } + + memset(zero_buf, 0, KEXEC_NOTE_BYTES); + + percpu_addr = SIZE(percpu_data) != NOT_FOUND_STRUCTURE + ? legacy_per_cpu_ptr(crash_notes_vaddr, cpu) + : per_cpu_ptr(crash_notes_vaddr, cpu); + + if (!readmem(VADDR, percpu_addr, note_buf, KEXEC_NOTE_BYTES)) + return FALSE; + + if (memcmp(note_buf, zero_buf, KEXEC_NOTE_BYTES) == 0) + return FALSE; + + if (!get_data_from_elf_note_desc(note_buf, SIZE(elf_prstatus), "CORE", + NT_PRSTATUS, (void *)&prstatus_ptr)) + return FALSE; + + memcpy(prstatus_buf, prstatus_ptr, SIZE(elf_prstatus)); + + return TRUE; +} + +static int +cpu_to_apicid(int cpu, int *apicid) +{ + if (SYMBOL(bios_cpu_apicid) != NOT_FOUND_SYMBOL) { + uint8_t apicid_u8; + + if (!readmem(VADDR, SYMBOL(bios_cpu_apicid)+cpu*sizeof(uint8_t), + &apicid_u8, sizeof(uint8_t))) + return FALSE; + + *apicid = (int)apicid_u8; + + DEBUG_MSG("sadump: apicid %u for cpu %d from " + "bios_cpu_apicid\n", apicid_u8, cpu); + + } else if (SYMBOL(x86_bios_cpu_apicid) != NOT_FOUND_SYMBOL) { + uint16_t apicid_u16; + unsigned long early_ptr, apicid_addr; + + if (!readmem(VADDR, SYMBOL(x86_bios_cpu_apicid_early_ptr), + &early_ptr, sizeof(early_ptr))) + return FALSE; + /* + * Note: SYMBOL(name) value is adjusted by info->kaslr_offset, + * but per_cpu symbol does not need to be adjusted becasue it + * is not affected by kaslr. + */ + apicid_addr = early_ptr + ? SYMBOL(x86_bios_cpu_apicid_early_map)+cpu*sizeof(uint16_t) + : per_cpu_ptr(SYMBOL(x86_bios_cpu_apicid) - info->kaslr_offset, cpu); + + if (!readmem(VADDR, apicid_addr, &apicid_u16, sizeof(uint16_t))) + return FALSE; + + *apicid = (int)apicid_u16; + + DEBUG_MSG("sadump: apicid %u for cpu %d from " + "x86_bios_cpu_apicid\n", apicid_u16, cpu); + + } else { + + ERRMSG("sadump: no symbols for access to acpidid\n"); + + return FALSE; + } + + return TRUE; +} + +static int +get_smram_cpu_state(int apicid, struct sadump_smram_cpu_state *smram) +{ + unsigned long offset; + + if (!si->sub_hdr_offset || !si->smram_cpu_state_size || + apicid >= si->sh_memory->nr_cpus) + return FALSE; + + offset = si->sub_hdr_offset + sizeof(uint32_t) + + si->sh_memory->nr_cpus * sizeof(struct sadump_apic_state); + + if (lseek(info->fd_memory, offset+apicid*si->smram_cpu_state_size, + SEEK_SET) < 0) + DEBUG_MSG("sadump: cannot lseek smram cpu state in dump sub " + "header\n"); + + if (read(info->fd_memory, smram, si->smram_cpu_state_size) != + si->smram_cpu_state_size) + DEBUG_MSG("sadump: cannot read smram cpu state in dump sub " + "header\n"); + + return TRUE; +} + +#ifdef __x86__ + +static int +copy_regs_from_prstatus(struct elf_prstatus *prstatus, + const char *prstatus_buf) +{ + struct user_regs_struct *r = &prstatus->pr_reg; + const char *pr_reg_buf = prstatus_buf + OFFSET(elf_prstatus.pr_reg); + + r->bx = ULONG(pr_reg_buf + OFFSET(user_regs_struct.bx)); + r->cx = ULONG(pr_reg_buf + OFFSET(user_regs_struct.cx)); + r->dx = ULONG(pr_reg_buf + OFFSET(user_regs_struct.dx)); + r->si = ULONG(pr_reg_buf + OFFSET(user_regs_struct.si)); + r->di = ULONG(pr_reg_buf + OFFSET(user_regs_struct.di)); + r->bp = ULONG(pr_reg_buf + OFFSET(user_regs_struct.bp)); + r->ax = ULONG(pr_reg_buf + OFFSET(user_regs_struct.ax)); + r->ds = ULONG(pr_reg_buf + OFFSET(user_regs_struct.ds)); + r->es = ULONG(pr_reg_buf + OFFSET(user_regs_struct.es)); + r->fs = ULONG(pr_reg_buf + OFFSET(user_regs_struct.fs)); + r->gs = ULONG(pr_reg_buf + OFFSET(user_regs_struct.gs)); + r->orig_ax = ULONG(pr_reg_buf + OFFSET(user_regs_struct.orig_ax)); + r->ip = ULONG(pr_reg_buf + OFFSET(user_regs_struct.ip)); + r->cs = ULONG(pr_reg_buf + OFFSET(user_regs_struct.cs)); + r->flags = ULONG(pr_reg_buf + OFFSET(user_regs_struct.flags)); + r->sp = ULONG(pr_reg_buf + OFFSET(user_regs_struct.sp)); + r->ss = ULONG(pr_reg_buf + OFFSET(user_regs_struct.ss)); + + return TRUE; +} + +static int +copy_regs_from_smram_cpu_state(struct elf_prstatus *prstatus, + const struct sadump_smram_cpu_state *smram) +{ + struct user_regs_struct *regs = &prstatus->pr_reg; + + regs->bx = smram->RbxLower; + regs->cx = smram->RcxLower; + regs->dx = smram->RdxLower; + regs->si = smram->RsiLower; + regs->di = smram->RdiLower; + regs->bp = smram->RbpLower; + regs->ax = smram->RaxLower; + regs->ds = smram->Ds & 0xffff; + regs->es = smram->Es & 0xffff; + regs->fs = smram->Fs & 0xffff; + regs->gs = smram->Gs & 0xffff; + regs->orig_ax = smram->RaxLower; + regs->ip = (uint32_t)smram->Rip; + regs->cs = smram->Cs & 0xffff; + regs->flags = (uint32_t)smram->Rflags; + regs->sp = smram->RspLower; + regs->ss = smram->Ss & 0xffff; + + return TRUE; +} + +static void +debug_message_user_regs_struct(int cpu, struct elf_prstatus *prstatus) +{ + struct user_regs_struct *r = &prstatus->pr_reg; + + DEBUG_MSG( + "sadump: CPU: %d\n" + " BX: %08lx CX: %08lx DX: %08lx SI: %08lx\n" + " DI: %08lx BP: %08lx AX: %08lx ORIG_AX: %08lx\n" + " DS: %04lx ES: %04lx FS: %04lx GS: %04lx CS: %04lx SS: %04lx\n" + " IP: %08lx FLAGS: %04lx SP: %08lx\n", + cpu, + r->bx, r->cx, r->dx, r->si, + r->di, r->bp, r->ax, r->orig_ax, + r->ds, r->es, r->fs, r->gs, r->cs, r->ss, + r->ip, r->flags, r->sp); +} + +#elif defined(__x86_64__) + +static int +copy_regs_from_prstatus(struct elf_prstatus *prstatus, + const char *prstatus_buf) +{ + struct user_regs_struct *r = &prstatus->pr_reg; + const char *pr_reg_buf = prstatus_buf + OFFSET(elf_prstatus.pr_reg); + + r->r15 = ULONG(pr_reg_buf + OFFSET(user_regs_struct.r15)); + r->r14 = ULONG(pr_reg_buf + OFFSET(user_regs_struct.r14)); + r->r13 = ULONG(pr_reg_buf + OFFSET(user_regs_struct.r13)); + r->bp = ULONG(pr_reg_buf + OFFSET(user_regs_struct.bp)); + r->bx = ULONG(pr_reg_buf + OFFSET(user_regs_struct.bx)); + r->r11 = ULONG(pr_reg_buf + OFFSET(user_regs_struct.r11)); + r->r10 = ULONG(pr_reg_buf + OFFSET(user_regs_struct.r10)); + r->r9 = ULONG(pr_reg_buf + OFFSET(user_regs_struct.r9)); + r->r8 = ULONG(pr_reg_buf + OFFSET(user_regs_struct.r8)); + r->ax = ULONG(pr_reg_buf + OFFSET(user_regs_struct.ax)); + r->cx = ULONG(pr_reg_buf + OFFSET(user_regs_struct.cx)); + r->dx = ULONG(pr_reg_buf + OFFSET(user_regs_struct.dx)); + r->si = ULONG(pr_reg_buf + OFFSET(user_regs_struct.si)); + r->di = ULONG(pr_reg_buf + OFFSET(user_regs_struct.di)); + r->orig_ax = ULONG(pr_reg_buf + OFFSET(user_regs_struct.orig_ax)); + r->ip = ULONG(pr_reg_buf + OFFSET(user_regs_struct.ip)); + r->cs = ULONG(pr_reg_buf + OFFSET(user_regs_struct.cs)); + r->flags = ULONG(pr_reg_buf + OFFSET(user_regs_struct.flags)); + r->sp = ULONG(pr_reg_buf + OFFSET(user_regs_struct.sp)); + r->ss = ULONG(pr_reg_buf + OFFSET(user_regs_struct.ss)); + r->fs_base = ULONG(pr_reg_buf + OFFSET(user_regs_struct.fs_base)); + r->gs_base = ULONG(pr_reg_buf + OFFSET(user_regs_struct.gs_base)); + r->ds = ULONG(pr_reg_buf + OFFSET(user_regs_struct.ds)); + r->es = ULONG(pr_reg_buf + OFFSET(user_regs_struct.es)); + r->fs = ULONG(pr_reg_buf + OFFSET(user_regs_struct.fs)); + r->gs = ULONG(pr_reg_buf + OFFSET(user_regs_struct.gs)); + + return TRUE; +} + +static int +copy_regs_from_smram_cpu_state(struct elf_prstatus *prstatus, + const struct sadump_smram_cpu_state *smram) +{ + struct user_regs_struct *regs = &prstatus->pr_reg; + + regs->r15 = ((uint64_t)smram->R15Upper<<32)+smram->R15Lower; + regs->r14 = ((uint64_t)smram->R14Upper<<32)+smram->R14Lower; + regs->r13 = ((uint64_t)smram->R13Upper<<32)+smram->R13Lower; + regs->r12 = ((uint64_t)smram->R12Upper<<32)+smram->R12Lower; + regs->bp = ((uint64_t)smram->RbpUpper<<32)+smram->RbpLower; + regs->bx = ((uint64_t)smram->RbxUpper<<32)+smram->RbxLower; + regs->r11 = ((uint64_t)smram->R11Upper<<32)+smram->R11Lower; + regs->r10 = ((uint64_t)smram->R10Upper<<32)+smram->R10Lower; + regs->r9 = ((uint64_t)smram->R9Upper<<32)+smram->R9Lower; + regs->r8 = ((uint64_t)smram->R8Upper<<32)+smram->R8Lower; + regs->ax = ((uint64_t)smram->RaxUpper<<32)+smram->RaxLower; + regs->cx = ((uint64_t)smram->RcxUpper<<32)+smram->RcxLower; + regs->dx = ((uint64_t)smram->RdxUpper<<32)+smram->RdxLower; + regs->si = ((uint64_t)smram->RsiUpper<<32)+smram->RsiLower; + regs->di = ((uint64_t)smram->RdiUpper<<32)+smram->RdiLower; + regs->orig_ax = ((uint64_t)smram->RaxUpper<<32)+smram->RaxLower; + regs->ip = smram->Rip; + regs->cs = smram->Cs; + regs->flags = smram->Rflags; + regs->sp = ((uint64_t)smram->RspUpper<<32)+smram->RspLower; + regs->ss = smram->Ss; + regs->fs_base = 0; + regs->gs_base = 0; + regs->ds = smram->Ds; + regs->es = smram->Es; + regs->fs = smram->Fs; + regs->gs = smram->Gs; + + return TRUE; +} + +static void +debug_message_user_regs_struct(int cpu, struct elf_prstatus *prstatus) +{ + struct user_regs_struct *r = &prstatus->pr_reg; + + DEBUG_MSG( + "sadump: CPU: %d\n" + " R15: %016llx R14: %016llx R13: %016llx\n" + " R12: %016llx RBP: %016llx RBX: %016llx\n" + " R11: %016llx R10: %016llx R9: %016llx\n" + " R8: %016llx RAX: %016llx RCX: %016llx\n" + " RDX: %016llx RSI: %016llx RDI: %016llx\n" + " ORIG_RAX: %016llx RIP: %016llx\n" + " CS: %04lx FLAGS: %08llx RSP: %016llx\n" + " SS: %04lx FS_BASE: %04lx GS_BASE: %04lx\n" + " DS: %04lx ES: %04lx FS: %04lx GS: %04lx\n", + cpu, + (unsigned long long)r->r15, (unsigned long long)r->r14, + (unsigned long long)r->r13, (unsigned long long)r->r12, + (unsigned long long)r->bp, (unsigned long long)r->bx, + (unsigned long long)r->r11, (unsigned long long)r->r10, + (unsigned long long)r->r9, (unsigned long long)r->r8, + (unsigned long long)r->ax, (unsigned long long)r->cx, + (unsigned long long)r->dx, (unsigned long long)r->si, + (unsigned long long)r->di, + (unsigned long long)r->orig_ax, + (unsigned long long)r->ip, r->cs, + (unsigned long long)r->flags, (unsigned long long)r->sp, + r->ss, r->fs_base, r->gs_base, r->ds, r->es, r->fs, + r->gs); +} + +#endif /* __x86_64__ */ + +static void +debug_message_smram_cpu_state(int apicid, struct sadump_smram_cpu_state *s) +{ + DEBUG_MSG( + "sadump: APIC ID: %d\n" + " RIP: %016llx RSP: %08x%08x RBP: %08x%08x\n" + " RAX: %08x%08x RBX: %08x%08x RCX: %08x%08x\n" + " RDX: %08x%08x RSI: %08x%08x RDI: %08x%08x\n" + " R08: %08x%08x R09: %08x%08x R10: %08x%08x\n" + " R11: %08x%08x R12: %08x%08x R13: %08x%08x\n" + " R14: %08x%08x R15: %08x%08x\n" + " SMM REV: %08x SMM BASE %08x\n" + " CS : %08x DS: %08x SS: %08x ES: %08x FS: %08x\n" + " GS : %08x\n" + " CR0: %016llx CR3: %016llx CR4: %08x\n" + " GDT: %08x%08x LDT: %08x%08x IDT: %08x%08x\n" + " GDTlim: %08x LDTlim: %08x IDTlim: %08x\n" + " LDTR: %08x TR: %08x RFLAGS: %016llx\n" + " EPTP: %016llx EPTP_SETTING: %08x\n" + " DR6: %016llx DR7: %016llx\n" + " Ia32Efer: %016llx\n" + " IoMemAddr: %08x%08x IoEip: %016llx\n" + " IoMisc: %08x LdtInfo: %08x\n" + " IoInstructionRestart: %04x AutoHaltRestart: %04x\n", + apicid, + (unsigned long long)s->Rip, s->RspUpper, s->RspLower, s->RbpUpper, s->RbpLower, + s->RaxUpper, s->RaxLower, s->RbxUpper, s->RbxLower, s->RcxUpper, s->RcxLower, + s->RdxUpper, s->RdxLower, s->RsiUpper, s->RsiLower, s->RdiUpper, s->RdiLower, + s->R8Upper, s->R8Lower, s->R9Upper, s->R9Lower, s->R10Upper, s->R10Lower, + s->R11Upper, s->R11Lower, s->R12Upper, s->R12Lower, s->R13Upper, s->R13Lower, + s->R14Upper, s->R14Lower, s->R15Upper, s->R15Lower, + s->SmmRevisionId, s->Smbase, + s->Cs, s->Ds, s->Ss, s->Es, s->Fs, s->Gs, + (unsigned long long)s->Cr0, (unsigned long long)s->Cr3, s->Cr4, + s->GdtUpper, s->GdtLower, s->LdtUpper, s->LdtLower, s->IdtUpper, s->IdtLower, + s->GdtLimit, s->LdtLimit, s->IdtLimit, + s->Ldtr, s->Tr, (unsigned long long)s->Rflags, + (unsigned long long)s->Eptp, s->EptpSetting, + (unsigned long long)s->Dr6, (unsigned long long)s->Dr7, + (unsigned long long)s->Ia32Efer, + s->IoMemAddrUpper, s->IoMemAddrLower, (unsigned long long)s->IoEip, + s->IoMisc, s->LdtInfo, + s->IoInstructionRestart, + s->AutoHaltRestart); +} + +static int +get_registers(int cpu, struct elf_prstatus *prstatus) +{ + struct sadump_smram_cpu_state smram; + char *prstatus_buf = NULL; + int retval = FALSE, apicid = 0; + + if (!(prstatus_buf = malloc(SIZE(elf_prstatus)))) { + ERRMSG("Can't allocate elf_prstatus buffer. %s\n", + strerror(errno)); + goto error; + } + + if (get_prstatus_from_crash_notes(cpu, prstatus_buf)) { + + if (!copy_regs_from_prstatus(prstatus, prstatus_buf)) + goto cleanup; + + DEBUG_MSG("sadump: cpu #%d registers from crash_notes\n", cpu); + + debug_message_user_regs_struct(cpu, prstatus); + + } else { + + if (!cpu_to_apicid(cpu, &apicid)) + goto cleanup; + + if (!get_smram_cpu_state(apicid, &smram)) + goto cleanup; + + copy_regs_from_smram_cpu_state(prstatus, &smram); + + DEBUG_MSG("sadump: cpu #%d registers from SMRAM\n", cpu); + + debug_message_smram_cpu_state(apicid, &smram); + debug_message_user_regs_struct(cpu, prstatus); + + } + + retval = TRUE; +cleanup: + free(prstatus_buf); +error: + return retval; +} + +int +sadump_add_diskset_info(char *name_memory) +{ + si->num_disks++; + + si->diskset_info = + realloc(si->diskset_info, + si->num_disks*sizeof(struct sadump_diskset_info)); + if (!si->diskset_info) { + ERRMSG("Can't allocate memory for sadump_diskset_info. %s\n", + strerror(errno)); + return FALSE; + } + + si->diskset_info[si->num_disks - 1].name_memory = name_memory; + si->diskset_info[si->num_disks - 1].fd_memory = -1; + + return TRUE; +} + +int +sadump_read_elf_note(char *buf, size_t size_note) +{ + if (!si->file_elf_note) + return FALSE; + + rewind(si->file_elf_note); + + if (fread(buf, size_note, 1, si->file_elf_note) != 1) { + ERRMSG("Can't read elf note file. %s\n", + strerror(errno)); + return FALSE; + } + + return TRUE; +} + +long +sadump_page_size(void) +{ + return si->sh_memory->block_size; +} + +char * +sadump_head_disk_name_memory(void) +{ + return si->diskset_info[0].name_memory; +} + +char * +sadump_format_type_name(void) +{ + switch (info->flag_sadump) { + case SADUMP_SINGLE_PARTITION: + return "single partition"; + case SADUMP_DISKSET: + return "diskset"; + case SADUMP_MEDIA_BACKUP: + return "media backup"; + case SADUMP_UNKNOWN: + return "unknown"; + } + return NULL; +} + +void +free_sadump_info(void) +{ + if (si->sph_memory) + free(si->sph_memory); + if (si->sh_memory) + free(si->sh_memory); + if (si->sdh_memory) + free(si->sdh_memory); + if (si->smh_memory) + free(si->smh_memory); + if (si->diskset_info) { + int i; + + for (i = 1; i < si->num_disks; ++i) { + if (si->diskset_info[i].fd_memory >= 0) + close(si->diskset_info[i].fd_memory); + if (si->diskset_info[i].sph_memory) + free(si->diskset_info[i].sph_memory); + } + free(si->diskset_info); + } + if (si->__per_cpu_offset) + free(si->__per_cpu_offset); + if (si->block_table) + free(si->block_table); + if (si->file_elf_note) + fclose(si->file_elf_note); + if (si->cpu_online_mask_buf) + free(si->cpu_online_mask_buf); + if (si->ram_bitmap) { + if (si->ram_bitmap->buf) + free(si->ram_bitmap->buf); + free(si->ram_bitmap); + } +} + +void +sadump_kdump_backup_region_init(void) +{ + unsigned char buf[BUFSIZE]; + unsigned long i, total, kexec_crash_image_p, elfcorehdr_p; + Elf64_Off e_phoff; + uint16_t e_phnum, e_phentsize; + unsigned long long backup_offset; + unsigned long backup_src_start, backup_src_size; + size_t bufsize; + + if (!readmem(VADDR, SYMBOL(kexec_crash_image), &kexec_crash_image_p, + sizeof(unsigned long))) { + ERRMSG("Can't read kexec_crash_image pointer. %s\n", + strerror(errno)); + return; + } + + if (!kexec_crash_image_p) { + DEBUG_MSG("sadump: kexec crash image was not loaded\n"); + return; + } + + if (!readmem(VADDR, kexec_crash_image_p+OFFSET(kimage.segment), + buf, SIZE(kexec_segment)*ARRAY_LENGTH(kimage.segment))) { + ERRMSG("Can't read kexec_crash_image->segment. %s\n", + strerror(errno)); + return; + } + + elfcorehdr_p = 0; + for (i = 0; i < ARRAY_LENGTH(kimage.segment); ++i) { + char e_ident[EI_NIDENT]; + unsigned mem; + + mem=ULONG(buf+i*SIZE(kexec_segment)+OFFSET(kexec_segment.mem)); + if (!mem) + continue; + + if (!readmem(PADDR, mem, e_ident, SELFMAG)) { + DEBUG_MSG("sadump: failed to read elfcorehdr buffer\n"); + return; + } + + if (strncmp(ELFMAG, e_ident, SELFMAG) == 0) { + elfcorehdr_p = mem; + break; + } + } + if (!elfcorehdr_p) { + DEBUG_MSG("sadump: kexec_crash_image contains no elfcorehdr " + "segment\n"); + return; + } + + if (!readmem(PADDR, elfcorehdr_p, buf, SIZE(elf64_hdr))) { + ERRMSG("Can't read elfcorehdr ELF header. %s\n", + strerror(errno)); + return; + } + + e_phnum = USHORT(buf + OFFSET(elf64_hdr.e_phnum)); + e_phentsize = USHORT(buf + OFFSET(elf64_hdr.e_phentsize)); + e_phoff = ULONG(buf + OFFSET(elf64_hdr.e_phoff)); + + backup_src_start = backup_src_size = backup_offset = 0; + for (i = 0; i < e_phnum; ++i) { + unsigned long p_type, p_offset, p_paddr, p_memsz; + + if (!readmem(PADDR, elfcorehdr_p+e_phoff+i*e_phentsize, buf, + e_phentsize)) { + ERRMSG("Can't read elfcorehdr program header. %s\n", + strerror(errno)); + return; + } + + p_type = UINT(buf + OFFSET(elf64_phdr.p_type)); + p_offset = ULONG(buf + OFFSET(elf64_phdr.p_offset)); + p_paddr = ULONG(buf + OFFSET(elf64_phdr.p_paddr)); + p_memsz = ULONG(buf + OFFSET(elf64_phdr.p_memsz)); + + if (p_type == PT_LOAD && + p_paddr <= KEXEC_BACKUP_SRC_END && + p_paddr + p_memsz <= p_offset) { + + backup_src_start = p_paddr; + backup_src_size = p_memsz; + backup_offset = p_offset; + +DEBUG_MSG("sadump: SRC_START: %#016lx SRC_SIZE: %#016lx SRC_OFFSET: %#016llx\n", + backup_src_start, backup_src_size, backup_offset); + + break; + } + } + if (i == e_phnum) { +DEBUG_MSG("sadump: No PT_LOAD in elfcorehdr for backup area\n"); + return; + } + + bufsize = BUFSIZE; + for (total = 0; total < backup_src_size; total += bufsize) { + + if (backup_src_size - total < BUFSIZE) + bufsize = backup_src_size - total; + + if (!readmem(PADDR, backup_offset + total, buf, bufsize)) { + ERRMSG("Can't read backup region. %s\n", + strerror(errno)); + return; + } + + /* + * We're assuming that the backup region is full of 0 + * before kdump saves the first 640kB memory of the + * 1st kernel in the region. + */ + if (!is_zero_page(buf, bufsize)) { + + si->kdump_backed_up = TRUE; + si->backup_src_start = backup_src_start; + si->backup_src_size = backup_src_size; + si->backup_offset = backup_offset; + + DEBUG_MSG("sadump: kdump backup region used\n"); + + return; + } + } + + DEBUG_MSG("sadump: kdump backup region unused\n"); +} + +#endif /* defined(__x86__) || defined(__x86_64__) */ diff --git a/sadump_info.h b/sadump_info.h new file mode 100644 index 0000000..131687a --- /dev/null +++ b/sadump_info.h @@ -0,0 +1,165 @@ +/* + * sadump_info.h + * + * Created by: HATAYAMA, Daisuke <d.hatayama@jp.fujitsu.com> + * + * Copyright (C) 2011 FUJITSU LIMITED + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _SADUMP_INFO_H +#define _SADUMP_INFO_H + +#include "makedumpfile.h" + +#ifdef __x86_64__ + +int sadump_virt_phys_base(void); + +#else + +static inline int sadump_virt_phys_base(void) +{ + return TRUE; +} + +#endif + +#if defined(__x86__) || defined(__x86_64__) + +int check_and_get_sadump_header_info(char *filename); +int sadump_copy_1st_bitmap_from_memory(void); +int sadump_initialize_bitmap_memory(void); +int sadump_num_online_cpus(void); +int sadump_set_timestamp(struct timeval *ts); +mdf_pfn_t sadump_get_max_mapnr(void); +int readpage_sadump(unsigned long long paddr, void *bufptr); +int sadump_check_debug_info(void); +int sadump_generate_vmcoreinfo_from_vmlinux(size_t *vmcoreinfo_size); +int sadump_generate_elf_note_from_dumpfile(void); +int sadump_copy_1st_bitmap_from_memory(void); +int sadump_add_diskset_info(char *name_memory); +int sadump_read_elf_note(char *buf, size_t size_note); +long sadump_page_size(void); +char *sadump_head_disk_name_memory(void); +char *sadump_format_type_name(void); +void free_sadump_info(void); +void sadump_kdump_backup_region_init(void); + +static inline int sadump_is_supported_arch(void) +{ + return TRUE; +} + +#else + +static inline int check_and_get_sadump_header_info(char *filename) +{ + info->flag_sadump = SADUMP_UNKNOWN; + + DEBUG_MSG("sadump: unsupported architecture\n"); + + return TRUE; +} + +static inline int sadump_copy_1st_bitmap_from_memory(void) +{ + return FALSE; +} + +static inline int sadump_initialize_bitmap_memory(void) +{ + return FALSE; +} + +static inline int sadump_num_online_cpus(void) +{ + return 0; +} + +static inline int sadump_set_timestamp(struct timeval *ts) +{ + return FALSE; +} + +static inline mdf_pfn_t sadump_get_max_mapnr(void) +{ + return 0; +} + +static inline int +readpage_sadump(unsigned long long paddr, void *bufptr) +{ + return FALSE; +} + +static inline int sadump_check_debug_info(void) +{ + return FALSE; +} + +static inline int +sadump_generate_vmcoreinfo_from_vmlinux(size_t *vmcoreinfo_size) +{ + return FALSE; +} + +static inline int sadump_generate_elf_note_from_dumpfile(void) +{ + return FALSE; +} + +static inline int sadump_add_diskset_info(char *name_memory) +{ + return TRUE; +} + +static inline int sadump_read_elf_note(char *buf, size_t size_note) +{ + return FALSE; +} + +static inline long sadump_page_size(void) +{ + return 0; +} + +static inline char * +sadump_head_disk_name_memory(void) +{ + return NULL; +} + +static inline char *sadump_format_type_name(void) +{ + return NULL; +} + +static inline void free_sadump_info(void) +{ + return; +} + +static inline int sadump_is_supported_arch(void) +{ + return FALSE; +} + +static inline void sadump_kdump_backup_region_init(void) +{ + return; +} + +#endif + +#endif /* _SADUMP_INFO_H */ diff --git a/sadump_mod.h b/sadump_mod.h new file mode 100644 index 0000000..0dd5bb5 --- /dev/null +++ b/sadump_mod.h @@ -0,0 +1,194 @@ +/* + * sadump_mod.h + * + * Created by: HATAYAMA, Daisuke <d.hatayama@jp.fujitsu.com> + * + * Copyright (C) 2011 FUJITSU LIMITED + * Copyright (C) 2011 NEC Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _SADUMP_MOD_H +#define _SADUMP_MOD_H + +#if defined(__x86__) || defined(__x86_64__) + +#include <stdint.h> + +typedef struct efi_time { + uint16_t year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t minute; + uint8_t second; + uint8_t pad1; + uint32_t nanosecond; +#define EFI_UNSPECIFIED_TIMEZONE 2047 + int16_t timezone; + uint8_t daylight; + uint8_t pad2; +} efi_time_t; + +typedef struct { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; +} efi_guid_t; + +struct sadump_part_header { +#define SADUMP_SIGNATURE1 0x75646173 +#define SADUMP_SIGNATURE2 0x0000706d + uint32_t signature1; /* sadu */ + uint32_t signature2; /* mp\0\0 */ + uint32_t enable; /* set sadump service */ + uint32_t reboot; /* number of seconds until reboot. 1-3600 */ + uint32_t compress; /* memory image format. */ + uint32_t recycle; /* dump device recycle */ + uint32_t label[16]; /* reserve */ + efi_guid_t sadump_id; /* system UUID */ + efi_guid_t disk_set_id; /* disk set UUID */ + efi_guid_t vol_id; /* device UUID */ + efi_time_t time_stamp; /* time stamp */ + uint32_t set_disk_set; /* device type */ +#define SADUMP_MAX_DISK_SET_NUM 16 + uint32_t reserve; /* Padding for Alignment */ + uint64_t used_device; /* used device */ +#define DUMP_PART_HEADER_MAGICNUM_SIZE 982 + uint32_t magicnum[DUMP_PART_HEADER_MAGICNUM_SIZE]; /* magic number */ +}; + +struct sadump_volume_info { + efi_guid_t id; /* volume id */ + uint64_t vol_size; /* device size */ + uint32_t status; /* device status */ + uint32_t cache_size; /* cache size */ +}; + +struct sadump_disk_set_header { + uint32_t disk_set_header_size; /* disk set header size */ + uint32_t disk_num; /* disk number */ + uint64_t disk_set_size; /* disk set size */ +#define DUMP_DEVICE_MAX 16 + struct sadump_volume_info vol_info[DUMP_DEVICE_MAX - 1]; + /* struct VOL_INFO array */ +}; + +struct sadump_header { +#define SADUMP_SIGNATURE "sadump\0\0" + char signature[8]; /* = "sadump\0\0" */ + uint32_t header_version; /* Dump header version */ + uint32_t reserve; /* Padding for Alignment */ + efi_time_t timestamp; /* Time stamp */ + uint32_t status; /* Above flags */ + uint32_t compress; /* Above flags */ + uint32_t block_size; /* Size of a block in byte */ +#define SADUMP_DEFAULT_BLOCK_SIZE 4096 + uint32_t extra_hdr_size; /* Size of host dependent + * header in blocks (reserve) + */ + uint32_t sub_hdr_size; /* Size of arch dependent header in blocks */ + uint32_t bitmap_blocks; /* Size of Memory bitmap in block */ + uint32_t dumpable_bitmap_blocks; /* Size of Memory bitmap in block */ + uint32_t max_mapnr; /* = max_mapnr */ + uint32_t total_ram_blocks; /* Size of Memory in block */ + uint32_t device_blocks; /* Number of total blocks in the dump device */ + uint32_t written_blocks; /* Number of written blocks */ + uint32_t current_cpu; /* CPU# which handles dump */ + uint32_t nr_cpus; /* Number of CPUs */ + /* + * The members from below are supported in header version 1 + * and later. + */ + uint64_t max_mapnr_64; + uint64_t total_ram_blocks_64; + uint64_t device_blocks_64; + uint64_t written_blocks_64; +}; + +struct sadump_apic_state { + uint64_t ApicId; /* Local Apic ID register */ + uint64_t Ldr; /* Logical Destination Register */ +}; + +struct sadump_smram_cpu_state { + uint64_t Reserved1[58]; + uint32_t GdtUpper, LdtUpper, IdtUpper; + uint32_t Reserved2[3]; + uint64_t IoEip; + uint64_t Reserved3[10]; + uint32_t Cr4; + uint32_t Reserved4[18]; + uint32_t GdtLower; + uint32_t GdtLimit; + uint32_t IdtLower; + uint32_t IdtLimit; + uint32_t LdtLower; + uint32_t LdtLimit; + uint32_t LdtInfo; + uint64_t Reserved5[6]; + uint64_t Eptp; + uint32_t EptpSetting; + uint32_t Reserved6[5]; + uint32_t Smbase; + uint32_t SmmRevisionId; + uint16_t IoInstructionRestart; + uint16_t AutoHaltRestart; + uint32_t Reserved7[6]; + uint32_t R15Lower, R15Upper, R14Lower, R14Upper; + uint32_t R13Lower, R13Upper, R12Lower, R12Upper; + uint32_t R11Lower, R11Upper, R10Lower, R10Upper; + uint32_t R9Lower, R9Upper, R8Lower, R8Upper; + uint32_t RaxLower, RaxUpper, RcxLower, RcxUpper; + uint32_t RdxLower, RdxUpper, RbxLower, RbxUpper; + uint32_t RspLower, RspUpper, RbpLower, RbpUpper; + uint32_t RsiLower, RsiUpper, RdiLower, RdiUpper; + uint32_t IoMemAddrLower, IoMemAddrUpper; + uint32_t IoMisc, Es, Cs, Ss, Ds, Fs, Gs; + uint32_t Ldtr, Tr; + uint64_t Dr7, Dr6, Rip, Ia32Efer, Rflags; + uint64_t Cr3, Cr0; +}; + +struct sadump_page_header { + uint64_t page_flags; + uint32_t size; + uint32_t flags; +}; + +struct sadump_media_header { + efi_guid_t sadump_id; // system UUID + efi_guid_t disk_set_id; // disk set UUID + efi_time_t time_stamp; /* time stamp */ + char sequential_num; // Medium sequential number + char term_cord; // Termination cord + char disk_set_header_size; // Size of original disk set header + char disks_in_use; // Number of used disks of original dump device + char reserve[4044]; // reserve feild +}; + +#define SADUMP_PF_SECTION_NUM 4096 + +#endif /* defined(__x86__) || defined(__x86_64__) */ + +/* + * Type of sadump related formats + */ +enum sadump_format_type { + SADUMP_UNKNOWN = 0, + SADUMP_SINGLE_PARTITION, + SADUMP_DISKSET, + SADUMP_MEDIA_BACKUP +}; + +#endif /* _SADUMP_MOD_H */ @@ -0,0 +1,766 @@ +/* tools.c - Borrowed from crash utility code + * (https://github.com/crash-utility/crash) + * + * Copyright (C) 1999, 2000, 2001, 2002 Mission Critical Linux, Inc. + * Copyright (C) 2002-2017 David Anderson + * Copyright (C) 2002-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "common.h" +#include "makedumpfile.h" +#include <ctype.h> + +#define FAULT_ON_ERROR (0x1) +#define RETURN_ON_ERROR (0x2) +#define QUIET (0x4) +#define HEX_BIAS (0x8) +#define LONG_LONG (0x10) +#define RETURN_PARTIAL (0x20) +#define NO_DEVMEM_SWITCH (0x40) + +#define MAX_HEXADDR_STRLEN (16) + +#define FIRSTCHAR(s) (s[0]) + +/* + * Determine whether a file exists, using the caller's stat structure if + * one was passed in. + */ +int +file_exists(char *file) +{ + struct stat sbuf; + + if (stat(file, &sbuf) == 0) + return TRUE; + + return FALSE; +} + +/* + * Parse a line into tokens, populate the passed-in argv[] array, and + * return the count of arguments found. This function modifies the + * passed-string by inserting a NULL character at the end of each token. + * Expressions encompassed by parentheses, and strings encompassed by + * apostrophes, are collected into single tokens. + */ +int +parse_line(char *str, char *argv[]) +{ + int i, j, k; + int string; + int expression; + + for (i = 0; i < MAXARGS; i++) + argv[i] = NULL; + + clean_line(str); + + if (str == NULL || strlen(str) == 0) + return(0); + + i = j = k = 0; + string = expression = FALSE; + + /* + * Special handling for when the first character is a '"'. + */ + if (str[0] == '"') { +next: + do { + i++; + } while ((str[i] != NULLCHAR) && (str[i] != '"')); + + switch (str[i]) + { + case NULLCHAR: + argv[j] = &str[k]; + return j+1; + case '"': + argv[j++] = &str[k+1]; + str[i++] = NULLCHAR; + if (str[i] == '"') { + k = i; + goto next; + } + break; + } + } else + argv[j++] = str; + + while (TRUE) { + if (j == MAXARGS) + ERRMSG("too many arguments in string!\n"); + + while (str[i] != ' ' && str[i] != '\t' && str[i] != NULLCHAR) { + i++; + } + + switch (str[i]) + { + case ' ': + case '\t': + str[i++] = NULLCHAR; + + while (str[i] == ' ' || str[i] == '\t') { + i++; + } + + if (str[i] == '"') { + str[i] = ' '; + string = TRUE; + i++; + } + + if (!string && str[i] == '(') { + expression = TRUE; + } + + if (str[i] != NULLCHAR && str[i] != '\n') { + argv[j++] = &str[i]; + if (string) { + string = FALSE; + while (str[i] != '"' && str[i] != NULLCHAR) + i++; + if (str[i] == '"') + str[i] = ' '; + } + if (expression) { + expression = FALSE; + while (str[i] != ')' && str[i] != NULLCHAR) + i++; + } + break; + } + /* else fall through */ + case '\n': + str[i] = NULLCHAR; + /* keep falling... */ + case NULLCHAR: + argv[j] = NULLCHAR; + return(j); + } + } +} + +/* + * Defuse controversy re: extensions to ctype.h + */ +int +whitespace(int c) +{ + return ((c == ' ') ||(c == '\t')); +} + +int +ascii(int c) +{ + return ((c >= 0) && (c <= 0x7f)); +} + +/* + * Strip line-ending whitespace and linefeeds. + */ +char * +strip_line_end(char *line) +{ + strip_linefeeds(line); + strip_ending_whitespace(line); + return(line); +} + +/* + * Strip line-beginning and line-ending whitespace and linefeeds. + */ +char * +clean_line(char *line) +{ + strip_beginning_whitespace(line); + strip_linefeeds(line); + strip_ending_whitespace(line); + return(line); +} + +/* + * Strip line-ending linefeeds in a string. + */ +char * +strip_linefeeds(char *line) +{ + char *p; + + if (line == NULL || strlen(line) == 0) + return(line); + + p = &LASTCHAR(line); + + while (*p == '\n') { + *p = NULLCHAR; + if (--p < line) + break; + } + + return(line); +} + +/* + * Strip a specified line-ending character in a string. + */ +char * +strip_ending_char(char *line, char c) +{ + char *p; + + if (line == NULL || strlen(line) == 0) + return(line); + + p = &LASTCHAR(line); + + if (*p == c) + *p = NULLCHAR; + + return(line); +} + +/* + * Strip a specified line-beginning character in a string. + */ +char * +strip_beginning_char(char *line, char c) +{ + if (line == NULL || strlen(line) == 0) + return(line); + + if (FIRSTCHAR(line) == c) + shift_string_left(line, 1); + + return(line); +} + +/* + * Strip line-ending whitespace. + */ +char * +strip_ending_whitespace(char *line) +{ + char *p; + + if (line == NULL || strlen(line) == 0) + return(line); + + p = &LASTCHAR(line); + + while (*p == ' ' || *p == '\t') { + *p = NULLCHAR; + if (p == line) + break; + p--; + } + + return(line); +} + +/* + * Strip line-beginning whitespace. + */ +char * +strip_beginning_whitespace(char *line) +{ + char buf[BUFSIZE]; + char *p; + + if (line == NULL || strlen(line) == 0) + return(line); + + strcpy(buf, line); + p = &buf[0]; + while (*p == ' ' || *p == '\t') + p++; + strcpy(line, p); + + return(line); +} + +/* + * End line at first comma found. + */ +char * +strip_comma(char *line) +{ + char *p; + + if ((p = strstr(line, ","))) + *p = NULLCHAR; + + return(line); +} + +/* + * Strip the 0x from the beginning of a hexadecimal value string. + */ +char * +strip_hex(char *line) +{ + if (STRNEQ(line, "0x")) + shift_string_left(line, 2); + + return(line); +} + +/* + * Turn a string into upper-case. + */ +char * +upper_case(const char *s, char *buf) +{ + const char *p1; + char *p2; + + p1 = s; + p2 = buf; + + while (*p1) { + *p2 = toupper(*p1); + p1++, p2++; + } + + *p2 = NULLCHAR; + + return(buf); +} + +/* + * Return pointer to first non-space/tab in a string. + */ +char * +first_nonspace(char *s) +{ + return(s + strspn(s, " \t")); +} + +/* + * Return pointer to first space/tab in a string. If none are found, + * return a pointer to the string terminating NULL. + */ +char * +first_space(char *s) +{ + return(s + strcspn(s, " \t")); +} + +/* + * Replace the first space/tab found in a string with a NULL character. + */ +char * +null_first_space(char *s) +{ + char *p1; + + p1 = first_space(s); + if (*p1) + *p1 = NULLCHAR; + + return s; +} + +/* + * Replace any instances of the characters in string c that are found in + * string s with the character passed in r. + */ +char * +replace_string(char *s, char *c, char r) +{ + int i, j; + + for (i = 0; s[i]; i++) { + for (j = 0; c[j]; j++) { + if (s[i] == c[j]) + s[i] = r; + } + } + + return s; +} + +/* + * Find the rightmost instance of a substring in a string. + */ +char * +strstr_rightmost(char *s, char *lookfor) +{ + char *next, *last, *p; + + for (p = s, last = NULL; *p; p++) { + if (!(next = strstr(p, lookfor))) + break; + last = p = next; + } + + return last; +} + +/* + * Shifts the contents of a string to the left by cnt characters, + * disposing the leftmost characters. + */ +char * +shift_string_left(char *s, int cnt) +{ + int origlen; + + if (!cnt) + return(s); + + origlen = strlen(s); + memmove(s, s+cnt, (origlen-cnt)); + *(s+(origlen-cnt)) = NULLCHAR; + return(s); +} + +/* + * Prints a string verbatim, allowing strings with % signs to be displayed + * without printf conversions. + */ +void +print_verbatim(FILE *filep, char *line) +{ + int i; + + for (i = 0; i < strlen(line); i++) { + fputc(line[i], filep); + fflush(filep); + } +} + +char * +fixup_percent(char *s) +{ + char *p1; + + if ((p1 = strstr(s, "%")) == NULL) + return s; + + s[strlen(s)+1] = NULLCHAR; + memmove(p1+1, p1, strlen(p1)); + *p1 = '%'; + + return s; +} + +/* + * Append a two-character string to a number to make 1, 2, 3 and 4 into + * 1st, 2nd, 3rd, 4th, and so on... + */ +char * +ordinal(ulong val, char *buf) +{ + char *p1; + + sprintf(buf, "%ld", val); + p1 = &buf[strlen(buf)-1]; + + switch (*p1) + { + case '1': + strcat(buf, "st"); + break; + case '2': + strcat(buf, "nd"); + break; + case '3': + strcat(buf, "rd"); + break; + default: + strcat(buf, "th"); + break; + } + + return buf; +} + +/* + * Determine whether a string contains only decimal characters. + * If count is non-zero, limit the search to count characters. + */ +int +decimal(char *s, int count) +{ + char *p; + int cnt, digits; + + if (!count) { + strip_line_end(s); + cnt = 0; + } else + cnt = count; + + for (p = &s[0], digits = 0; *p; p++) { + switch(*p) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + digits++; + case ' ': + break; + default: + return FALSE; + } + + if (count && (--cnt == 0)) + break; + } + + return (digits ? TRUE : FALSE); +} + +/* + * Determine whether a string contains only ASCII characters. + */ +int +ascii_string(char *s) +{ + char *p; + + for (p = &s[0]; *p; p++) { + if (!ascii(*p)) + return FALSE; + } + + return TRUE; +} + +/* + * Check whether a string contains only printable ASCII characters. + */ +int +printable_string(char *s) +{ + char *p; + + for (p = &s[0]; *p; p++) { + if (!isprint(*p)) + return FALSE; + } + + return TRUE; +} + +/* + * Convert a string to a hexadecimal long value. + */ +ulong +htol(char *s, int flags) +{ + ulong i, j; + ulong n; + + if (s == NULL) { + if (!(flags & QUIET)) + ERRMSG("received NULL string\n"); + goto htol_error; + } + + if (STRNEQ(s, "0x") || STRNEQ(s, "0X")) + s += 2; + + if (strlen(s) > MAX_HEXADDR_STRLEN) { + if (!(flags & QUIET)) + ERRMSG("input string too large: \"%s\" (%d vs %d)\n", + s, (int)strlen(s), (int)MAX_HEXADDR_STRLEN); + goto htol_error; + } + + for (n = i = 0; s[i] != 0; i++) { + switch (s[i]) + { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + j = (s[i] - 'a') + 10; + break; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + j = (s[i] - 'A') + 10; + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '0': + j = s[i] - '0'; + break; + case 'x': + case 'X': + case 'h': + continue; + default: + if (!(flags & QUIET)) + ERRMSG("invalid input: \"%s\"\n", s); + goto htol_error; + } + n = (16 * n) + j; + } + + return(n); + +htol_error: + return BADADDR; +} + +/* + * Determine whether a string contains only hexadecimal characters. + * If count is non-zero, limit the search to count characters. + */ +int +hexadecimal(char *s, int count) +{ + char *p; + int cnt, digits; + + if (!count) { + strip_line_end(s); + cnt = 0; + } else + cnt = count; + + for (p = &s[0], digits = 0; *p; p++) { + switch(*p) + { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '0': + digits++; + case 'x': + case 'X': + break; + + case ' ': + if (*(p+1) == NULLCHAR) + break; + else + return FALSE; + default: + return FALSE; + } + + if (count && (--cnt == 0)) + break; + } + + return (digits ? TRUE : FALSE); +} + +/* + * Determine whether a string contains only hexadecimal characters. + * and cannot be construed as a decimal number. + * If count is non-zero, limit the search to count characters. + */ +int +hexadecimal_only(char *s, int count) +{ + char *p; + int cnt, only; + + if (!count) { + strip_line_end(s); + cnt = 0; + } else + cnt = count; + + only = 0; + + for (p = &s[0]; *p; p++) { + switch(*p) + { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'x': + case 'X': + only++; + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '0': + break; + + case ' ': + if (*(p+1) == NULLCHAR) + break; + else + return FALSE; + default: + return FALSE; + } + + if (count && (--cnt == 0)) + break; + } + + return only; +} |