From 077b977ce5a780c6b43ceff26880fa84212f3709 Mon Sep 17 00:00:00 2001 From: Dodji Seketeli Date: Wed, 31 May 2017 10:30:23 +0200 Subject: [PATCH] Allow re-using the ELF/DWARF read_context when loading a corpus group Right now, when loading each corpus of a group, a new read_context is created and destroyed. That makes thousands of corpora that are created and destroyed. Profiling seems to argue that we'd gain in performance by re-using the first read_context that was created instead, and re-set it before loading a new corpus. This is what this patch does. * include/abg-dwarf-reader.h (reset_read_context): Declare new function. * src/abg-dwarf-reader.cc (read_context::elf_paths_): Make this to be non const. (read_context::initialize): New function to initialize all data members. (read_context::read_context): Use the new read_context::initialize function, rather than initializing data members 'inline' here. (reset_read_context): Define a new function to reset a read_context so that it can be re-used to load a new corpus. Signed-off-by: Dodji Seketeli --- include/abg-dwarf-reader.h | 8 ++ src/abg-dwarf-reader.cc | 209 ++++++++++++++++++++++++++++++++----- src/abg-tools-utils.cc | 20 ++-- 3 files changed, 198 insertions(+), 39 deletions(-) diff --git a/include/abg-dwarf-reader.h b/include/abg-dwarf-reader.h index 93ae3acc..14b82e56 100644 --- a/include/abg-dwarf-reader.h +++ b/include/abg-dwarf-reader.h @@ -100,6 +100,14 @@ create_read_context(const std::string& elf_path, bool read_all_types = false, bool linux_kernel_mode = false); +void +reset_read_context(read_context_sptr &ctxt, + const std::string& elf_path, + char** debug_info_root_path, + ir::environment* environment, + bool read_all_types = false, + bool linux_kernel_mode = false); + void add_read_context_suppressions(read_context& ctxt, const suppr::suppressions_type& supprs); diff --git a/src/abg-dwarf-reader.cc b/src/abg-dwarf-reader.cc index 0ed3dd8f..f384332f 100644 --- a/src/abg-dwarf-reader.cc +++ b/src/abg-dwarf-reader.cc @@ -2618,6 +2618,10 @@ struct dwarf_expr_eval_context /// This context is to be created by create_read_context(). It's then /// passed to all the routines that read specific dwarf bits as they /// get some important data from it. +/// +/// When a new data member is added to this context, it must be +/// initiliazed by the read_context::initiliaze() function. So please +/// do not forget. class read_context { public: @@ -2758,7 +2762,7 @@ public: // The address range of the offline elf file we are looking at. Dwfl_Module* elf_module_; mutable Elf* elf_handle_; - const string elf_path_; + string elf_path_; mutable Elf_Scn* bss_section_; mutable Elf_Scn* text_section_; mutable Elf_Scn* rodata_section_; @@ -2896,43 +2900,138 @@ public: /// reachable from publicly exported declarations (of functions and /// variables) are read. If set to true then all types found in the /// debug information are loaded. + /// + /// @param linux_kernel_mode if set to true, then consider the special + /// linux kernel symbol tables when determining if a symbol is + /// exported or not. read_context(const string& elf_path, char** debug_info_root_path, ir::environment* environment, bool load_all_types, bool linux_kernel_mode) - : dwarf_version_(), - handle_(), - dwarf_(), - alt_fd_(), - alt_dwarf_(), - elf_module_(), - elf_handle_(), - elf_path_(elf_path), - bss_section_(), - text_section_(), - rodata_section_(), - data_section_(), - data1_section_(), - symtab_section_(), - opd_section_(), - ksymtab_section_(), - ksymtab_gpl_section_(), - versym_section_(), - verdef_section_(), - verneed_section_(), - symbol_versionning_sections_loaded_(), - symbol_versionning_sections_found_(), - cur_tu_die_(), - exported_decls_builder_() { + initialize(elf_path, debug_info_root_path, environment, + load_all_types, linux_kernel_mode); + } + + /// Initializer of read_context. + /// + /// @param elf_path the path to the elf file the context is to be + /// used for. + /// + /// @param debug_info_root_path a pointer to the path to the root + /// directory under which the debug info is to be found for @p + /// elf_path. Leave this to NULL if the debug info is not in a + /// split file. + /// + /// @param environment the environment used by the current context. + /// This environment contains resources needed by the reader and by + /// the types and declarations that are to be created later. Note + /// that ABI artifacts that are to be compared all need to be + /// created within the same environment. + /// + /// Please also note that the life time of this environment object + /// must be greater than the life time of the resulting @ref + /// read_context the context uses resources that are allocated in + /// the environment. + /// + /// @param load_all_types if set to false only the types that are + /// reachable from publicly exported declarations (of functions and + /// variables) are read. If set to true then all types found in the + /// debug information are loaded. + /// + /// @param linux_kernel_mode if set to true, then consider the + /// special linux kernel symbol tables when determining if a symbol + /// is exported or not. + void + initialize(const string& elf_path, + char** debug_info_root_path, + ir::environment* environment, + bool load_all_types, + bool linux_kernel_mode) + { + dwarf_version_ = 0; + dwarf_ = 0; + handle_.reset(); + alt_fd_ = 0; + alt_dwarf_ = 0; + elf_module_ = 0; + elf_handle_ = 0; + elf_path_ = elf_path; + bss_section_ = 0; + text_section_ = 0; + rodata_section_ = 0; + data_section_ = 0; + data1_section_ = 0; + symtab_section_ = 0; + opd_section_ = 0; + ksymtab_section_ = 0; + ksymtab_gpl_section_ = 0; + versym_section_ = 0; + verdef_section_ = 0; + verneed_section_ = 0; + symbol_versionning_sections_loaded_ = 0; + symbol_versionning_sections_found_ = 0; + cur_tu_die_ = 0; + exported_decls_builder_ = 0; + + clear_alt_debug_info_data(); + + supprs_.clear(); + decl_die_repr_die_offsets_maps_.clear(); + type_die_repr_die_offsets_maps_.clear(); + die_qualified_name_maps_.clear(); + die_pretty_repr_maps_.clear(); + die_pretty_type_repr_maps_.clear(); + decl_die_artefact_maps_.clear(); + type_die_artefact_maps_.clear(); + canonical_type_die_vecs_.clear(); + canonical_decl_die_vecs_.clear(); + die_wip_classes_map_.clear(); + alternate_die_wip_classes_map_.clear(); + type_unit_die_wip_classes_map_.clear(); + die_wip_function_types_map_.clear(); + alternate_die_wip_function_types_map_.clear(); + type_unit_die_wip_function_types_map_.clear(); + die_function_with_no_symbol_map_.clear(); + types_to_canonicalize_.clear(); + alt_types_to_canonicalize_.clear(); + type_unit_types_to_canonicalize_.clear(); + decl_only_classes_map_.clear(); + die_tu_map_.clear(); + cur_corpus_group_.reset(); + cur_corpus_.reset(); + cur_tu_.reset(); + primary_die_parent_map_.clear(); + tu_die_imported_unit_points_map_.clear(); + alt_tu_die_imported_unit_points_map_.clear(); + type_units_tu_die_imported_unit_points_map_.clear(); + alternate_die_parent_map_.clear(); + type_section_die_parent_map_.clear(); + var_decls_to_add_.clear(); + fun_addr_sym_map_.reset(); + fun_entry_addr_sym_map_.reset(); + fun_syms_.reset(); + var_addr_sym_map_.reset(); + var_syms_.reset(); + undefined_fun_syms_.reset(); + undefined_var_syms_.reset(); + linux_exported_fn_syms_.reset(); + linux_exported_var_syms_.reset(); + linux_exported_gpl_fn_syms_.reset(); + linux_exported_gpl_var_syms_.reset(); + dt_needed_.clear(); + dt_soname_.clear(); + elf_architecture_.clear(); + + clear_per_translation_unit_data(); + memset(&offline_callbacks_, 0, sizeof(offline_callbacks_)); create_default_dwfl(debug_info_root_path); options_.env = environment; options_.load_in_linux_kernel_mode = linux_kernel_mode; options_.load_all_types = load_all_types; load_in_linux_kernel_mode(linux_kernel_mode); - env(environment); } /// Clear the resources related to the alternate DWARF data. @@ -15305,9 +15404,10 @@ operator&=(status& l, status r) /// /// @param elf_path the path to the elf file the context is to be used for. /// -/// @param a pointer to the path to the root directory under which the -/// debug info is to be found for @p elf_path. Leave this to NULL if -/// the debug info is not in a split file. +/// @param debug_info_root_path a pointer to the path to the root +/// directory under which the debug info is to be found for @p +/// elf_path. Leave this to NULL if the debug info is not in a split +/// file. /// /// @param environment the environment used by the current context. /// This environment contains resources needed by the reader and by @@ -15325,6 +15425,10 @@ operator&=(status& l, status r) /// variables) are read. If set to true then all types found in the /// debug information are loaded. /// +/// @param linux_kernel_mode if set to true, then consider the special +/// linux kernel symbol tables when determining if a symbol is +/// exported or not. +/// /// @return a smart pointer to the resulting dwarf_reader::read_context. read_context_sptr create_read_context(const std::string& elf_path, @@ -15341,6 +15445,53 @@ create_read_context(const std::string& elf_path, return result; } +/// Re-initialize a read_context so that it can re-used to read +/// another binary. +/// +/// @param ctxt the context to re-initialize. +/// +/// @param elf_path the path to the elf file the context is to be used +/// for. +/// +/// @param debug_info_root_path a pointer to the path to the root +/// directory under which the debug info is to be found for @p +/// elf_path. Leave this to NULL if the debug info is not in a split +/// file. +/// +/// @param environment the environment used by the current context. +/// This environment contains resources needed by the reader and by +/// the types and declarations that are to be created later. Note +/// that ABI artifacts that are to be compared all need to be created +/// within the same environment. +/// +/// Please also note that the life time of this environment object +/// must be greater than the life time of the resulting @ref +/// read_context the context uses resources that are allocated in the +/// environment. +/// +/// @param load_all_types if set to false only the types that are +/// reachable from publicly exported declarations (of functions and +/// variables) are read. If set to true then all types found in the +/// debug information are loaded. +/// +/// @param linux_kernel_mode if set to true, then consider the special +/// linux kernel symbol tables when determining if a symbol is +/// exported or not. +/// +/// @return a smart pointer to the resulting dwarf_reader::read_context. +void +reset_read_context(read_context_sptr &ctxt, + const std::string& elf_path, + char** debug_info_root_path, + ir::environment* environment, + bool read_all_types, + bool linux_kernel_mode) +{ + if (ctxt) + ctxt->initialize(elf_path, debug_info_root_path, environment, + read_all_types, linux_kernel_mode); +} + /// Add suppressions specifications to the set of suppressions to be /// used during the construction of the ABI internal representation /// (the ABI corpus) from ELF and DWARF. diff --git a/src/abg-tools-utils.cc b/src/abg-tools-utils.cc index a452bdf5..a66a5fd2 100644 --- a/src/abg-tools-utils.cc +++ b/src/abg-tools-utils.cc @@ -1635,6 +1635,7 @@ build_corpus_group_from_kernel_dist_under(const string& root, if (verbose) std::cout << "DONE\n"; + dwarf_reader::read_context_sptr ctxt; if (got_binary_paths) { shared_ptr di_root = @@ -1644,8 +1645,8 @@ build_corpus_group_from_kernel_dist_under(const string& root, corpus_group_sptr group; if (!vmlinux.empty()) { - dwarf_reader::read_context_sptr ctxt = - dwarf_reader::create_read_context(vmlinux, &di_root_ptr, env.get(), + ctxt = + dwarf_reader::create_read_context(vmlinux, &di_root_ptr,env.get(), /*read_all_types=*/false, /*linux_kernel_mode=*/true); @@ -1689,24 +1690,23 @@ build_corpus_group_from_kernel_dist_under(const string& root, << "/" << total_nb_modules << ") ... " << std::flush; - dwarf_reader::read_context_sptr module_ctxt = - dwarf_reader::create_read_context(*m, &di_root_ptr, env.get(), - /*read_all_types=*/false, - /*linux_kernel_mode=*/true); + reset_read_context(ctxt, *m, &di_root_ptr, env.get(), + /*read_all_types=*/false, + /*linux_kernel_mode=*/true); // If we have been given a whitelist of functions and // variable symbols to look at, then we can avoid loading // and analyzing the ELF symbol table. bool do_ignore_symbol_table = !kabi_wl_paths.empty(); - set_ignore_symbol_table(*module_ctxt, do_ignore_symbol_table); + set_ignore_symbol_table(*ctxt, do_ignore_symbol_table); - load_generate_apply_suppressions(*module_ctxt, suppr_paths, + load_generate_apply_suppressions(*ctxt, suppr_paths, kabi_wl_paths, supprs); - set_read_context_corpus_group(*module_ctxt, group); + set_read_context_corpus_group(*ctxt, group); - read_and_add_corpus_to_group_from_elf(*module_ctxt, + read_and_add_corpus_to_group_from_elf(*ctxt, *group, status); if (verbose) std::cout << " DONE\n";