diff --git a/COPYING b/COPYING
new file mode 100644
index 000000000..e963df829
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,622 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 000000000..63b99f4ea
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,109 @@
+== Basic build ==
+
+1. Edit config.mk. Follow the comments there.
+ Optionally you can change compiler settings in include_[GCC|ICC].mk.
+ Please note that only the default compiler flags are supported and tested.
+2. make
+(2.a make likwid-bench (if you want to build and install likwid-bench)
+3. make install (this is required for likwid-pin and if you use the accessDaemon)
+4. setup access to the msr device files (see end of this document)
+
+Only the default flags set are tested. As it is not possible to test all
+compiler setting variants the Intel icc compiler is only build tested. A basic
+function test is done for the icc binary. The only variant fully tested is gcc
+with default compiler flags. It is therefore recommended to use gcc with the
+default flags. If you want to use and build the Fortran interface you can mix
+GCC with the Intel Fortran Compiler. More information on this can be found in
+the WIKI.
+
+*NOTICE*
+
+All generated files are located in the [GCC|ICC] build directory.
+This includes the dependency files, object files and also the
+generated source files and the pas and assembly files for likwid-bench.
+If you debug your likwid-bench benchmarks you can look at all
+intermediate build files and also the final assembly code.
+
+== Known problems ==
+
+On very old systems with old kernels (< 2.6.7) or old glibc versions likwid
+is build with reduced funtionality. This includes missing support for NUMA
+and pinning.
+
+== Additional Targets ==
+
+make clean - clean the object directory
+make distclean - clean also the executables/libraries
+make uninstall - delete installed files
+
+== Build likwid-bench ==
+
+To build likwid-bench you have to explicitly call:
+
+make likwid-bench
+
+This is because likwid-bench does not compile on 32bit systems.
+
+== Build accessDaemon ==
+
+To build the accessDaemon:
+
+1. Edit config.mk and configure path in ACCESSDAEMON variable.
+2. Set the desired default ACCESSMODE. You can overwrite this on the command line.
+2. make will also build the accessDaemon
+3. Install with
+ make install
+
+With the standard make install target the daemon will also be installed in
+${PREFIX}/bin . Don't forget to copy the dameon if you configured a different
+path in ACCESSDAEMON.
+
+== Setup of msr module ==
+
+likwid-perfctr, likwid-powermeter and likwid-features require the Linux msr kernel module. This module
+is part of most standard distro kernels. You have to be root to do the initial setup.
+
+1. Check if the msr module is loaded with 'lsmod | grep msr' . There should be an output.
+2. It the module is not loaded load it with 'modprobe msr' . For automatic loading at startup
+consult your distros documentation how to do so.
+3. Adopt access rights on the msr device files for normal user. To allow everybody access you can
+use 'chmod o+rw /dev/cpu/*/msr' . This is only recommended on save single user desktop systems.
+
+As a general access to the msr registers is not desired on security sensitive
+systems you can either implement a more sophisticated access rights settings
+with e.g. setgid. A common solution used on many other device files, e.g. for
+audio, is to introduce a group and make a chown on the msr device files to that
+group. Now if you execute likwid-perfctr with setgid on that group the
+executing user can use the tool but cannot directly write or read the msr
+device files.
+
+A secure solution is to use the accessDaemon, which encapsulates the access to
+the msr device files and performs a address check for allowed registers. For
+more information how to setup and use this solution have a look at the WIKI
+page:
+
+http://code.google.com/p/likwid/wiki/MSRDaemon
+
+A demo for a root exploit involving the msr device files was published. As
+a consequence the security settings for access to the msr device files are
+tightened in recent kernels.
+
+Just setting the file access rights or using suid root on the access daemon is
+not sufficient anymore. You have to register your binary now to get access.
+This is only necessary if above setup dos not work.
+
+You register the necessary capability by calling
+
+sudo setcap cap_sys_rawio+ep EXECUTABLE
+
+on the executables. This is only possible on local file systems.
+The only feasable way is to register the likwid-accessD and proxy all access over it.
+
+If you have still problems please let me know on the likwid mailing list:
+
+http://groups.google.com/group/likwid-users
+
+
+
+
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..abcdf6c37
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,242 @@
+# =======================================================================================
+#
+# Filename: Makefile
+#
+# Description: Central Makefile
+#
+# Version:
+# Released:
+#
+# Author: Jan Treibig (jt), jan.treibig@gmail.com
+# Project: likwid
+#
+# Copyright (C) 2013 Jan Treibig
+#
+# This program is free software: you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation, either version 3 of the License, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program. If not, see .
+#
+# =======================================================================================
+
+SRC_DIR = ./src
+DOC_DIR = ./doc
+GROUP_DIR = ./groups
+FILTER_DIR = ./filters
+MAKE_DIR = ./make
+EXT_TARGETS = ./ext/lua ./ext/hwloc ./src/libwid
+
+#DO NOT EDIT BELOW
+
+
+# Dependency chains:
+# *.[ch] -> *.o -> executables
+# *.ptt -> *.pas -> *.s -> *.o -> executables
+# *.txt -> *.h (generated)
+
+include ./config.mk
+include $(MAKE_DIR)/include_$(COMPILER).mk
+include $(MAKE_DIR)/config_checks.mk
+include $(MAKE_DIR)/config_defines.mk
+
+INCLUDES += -I./src/includes -I./ext/lua/includes -I./ext/hwloc/include -I$(BUILD_DIR)
+LIBS +=
+
+#CONFIGURE BUILD SYSTEM
+BUILD_DIR = ./$(COMPILER)
+Q ?= @
+GENGROUPLOCK = .gengroup
+
+ifeq ($(COMPILER),MIC)
+BENCH_DIR = ./bench/phi
+else
+ifeq ($(COMPILER),GCCX86)
+BENCH_DIR = ./bench/x86
+else
+BENCH_DIR = ./bench/x86-64
+endif
+endif
+
+ifeq ($(SHARED_LIBRARY),true)
+CFLAGS += $(SHARED_CFLAGS)
+LIBS += -L. -llikwid -lm
+DYNAMIC_TARGET_LIB := liblikwid.so
+TARGET_LIB := $(DYNAMIC_TARGET_LIB)
+else
+STATIC_TARGET_LIB := liblikwid.a
+TARGET_LIB := $(STATIC_TARGET_LIB)
+endif
+
+
+VPATH = $(SRC_DIR)
+OBJ = $(patsubst $(SRC_DIR)/%.c, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.c))
+OBJ += $(patsubst $(SRC_DIR)/%.cc, $(BUILD_DIR)/%.o,$(wildcard $(SRC_DIR)/*.cc))
+PERFMONHEADERS = $(patsubst $(SRC_DIR)/includes/%.txt, $(BUILD_DIR)/%.h,$(wildcard $(SRC_DIR)/includes/*.txt))
+OBJ_BENCH = $(patsubst $(BENCH_DIR)/%.ptt, $(BUILD_DIR)/%.o,$(wildcard $(BENCH_DIR)/*.ptt))
+OBJ_LUA = $(wildcard ./ext/lua/$(COMPILER)/*.o)
+OBJ_HWLOC = $(wildcard ./ext/hwloc/$(COMPILER)/*.o)
+OBJ_LIBWID = $(wildcard ./src/libwid/$(COMPILER)/*.o)
+
+APPS = likwid-perfctr \
+ likwid-features \
+ likwid-powermeter \
+ likwid-memsweeper \
+ likwid-topology \
+ likwid-genCfg \
+ likwid-pin \
+ likwid-bench
+
+LIBWID = libwid.a
+LIBHWLOC = ext/hwloc/libhwloc.a
+
+CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES)
+
+all: $(BUILD_DIR) $(GENGROUPLOCK) $(PERFMONHEADERS) $(OBJ) $(OBJ_BENCH) $(EXT_TARGETS) $(STATIC_TARGET_LIB) $(DYNAMIC_TARGET_LIB) $(APPS) $(FORTRAN_INTERFACE) $(PINLIB) $(DAEMON_TARGET)
+
+tags:
+ @echo "===> GENERATE TAGS"
+ $(Q)ctags -R
+
+$(APPS): $(addprefix $(SRC_DIR)/applications/,$(addsuffix .c,$(APPS))) $(BUILD_DIR) $(GENGROUPLOCK) $(OBJ) $(OBJ_BENCH)
+ @echo "===> LINKING $@"
+ $(Q)${CC} $(CFLAGS) $(ANSI_CFLAGS) $(CPPFLAGS) ${LFLAGS} -o $@ $(addprefix $(SRC_DIR)/applications/,$(addsuffix .c,$@)) $(OBJ_BENCH) $(TARGET_LIB) $(LIBHWLOC) $(LIBS)
+
+$(STATIC_TARGET_LIB): $(OBJ)
+ @echo "===> CREATE STATIC LIB $(STATIC_TARGET_LIB)"
+ $(Q)${AR} -cq $(STATIC_TARGET_LIB) $(OBJ) $(OBJ_HWLOC)
+
+$(LIBWID): $(OBJ_LUA) $(OBJ_HWLOC) $(OBJ_LIBWID)
+ @echo "===> CREATE STATIC LIB $(LIBWID)"
+ $(Q)${AR} -cq $(LIBWID) $(OBJ_LUA) $(OBJ_HWLOC) $(OBJ_LIBWID)
+
+
+$(DYNAMIC_TARGET_LIB): $(OBJ)
+ @echo "===> CREATE SHARED LIB $(DYNAMIC_TARGET_LIB)"
+ $(Q)${CC} $(SHARED_LFLAGS) $(SHARED_CFLAGS) -o $(DYNAMIC_TARGET_LIB) $(OBJ) $(OBJ_HWLOC)
+
+$(DAEMON_TARGET): $(SRC_DIR)/access-daemon/accessDaemon.c
+ @echo "===> Build access daemon likwid-accessD"
+ $(Q)$(MAKE) -C $(SRC_DIR)/access-daemon
+
+$(BUILD_DIR):
+ @mkdir $(BUILD_DIR)
+
+$(PINLIB):
+ @echo "===> CREATE LIB $(PINLIB)"
+ $(Q)$(MAKE) -s -C src/pthread-overload/ $(PINLIB)
+
+$(GENGROUPLOCK): $(foreach directory,$(shell ls $(GROUP_DIR)), $(wildcard $(GROUP_DIR)/$(directory)/*.txt))
+ @echo "===> GENERATE GROUP HEADERS"
+ $(Q)$(GEN_GROUPS) ./groups $(BUILD_DIR) ./perl/templates
+ $(Q)touch $(GENGROUPLOCK)
+
+$(FORTRAN_INTERFACE): $(SRC_DIR)/likwid.f90
+ @echo "===> COMPILE FORTRAN INTERFACE $@"
+ $(Q)$(FC) -c $(FCFLAGS) $<
+ @rm -f likwid.o
+
+$(EXT_TARGETS):
+ @echo "===> ENTER $@"
+ $(Q)$(MAKE) --no-print-directory -C $@ $(MAKECMDGOALS)
+
+#PATTERN RULES
+$(BUILD_DIR)/%.o: %.c
+ @echo "===> COMPILE $@"
+ $(Q)$(CC) -c $(CFLAGS) $(ANSI_CFLAGS) $(CPPFLAGS) $< -o $@
+ $(Q)$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
+
+$(BUILD_DIR)/%.o: %.cc
+ @echo "===> COMPILE $@"
+ $(Q)$(CXX) -c $(CXXFLAGS) $(CPPFLAGS) $< -o $@
+ $(Q)$(CXX) $(CXXFLAGS) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
+
+
+$(BUILD_DIR)/%.pas: $(BENCH_DIR)/%.ptt
+ @echo "===> GENERATE BENCHMARKS"
+ $(Q)$(GEN_PAS) $(BENCH_DIR) $(BUILD_DIR) ./perl/templates
+
+$(BUILD_DIR)/%.h: $(SRC_DIR)/includes/%.txt
+ @echo "===> GENERATE HEADER $@"
+ $(Q)$(GEN_PMHEADER) $< $@
+
+$(BUILD_DIR)/%.o: $(BUILD_DIR)/%.pas
+ @echo "===> ASSEMBLE $@"
+ $(Q)$(PAS) -i $(PASFLAGS) -o $(BUILD_DIR)/$*.s $< '$(DEFINES)'
+ $(Q)$(AS) $(ASFLAGS) $(BUILD_DIR)/$*.s -o $@
+
+ifeq ($(findstring $(MAKECMDGOALS),clean),)
+-include $(OBJ:.o=.d)
+endif
+
+.PHONY: clean distclean install uninstall $(EXT_TARGETS)
+
+
+.PRECIOUS: $(BUILD_DIR)/%.pas
+
+.NOTPARALLEL:
+
+
+clean: $(EXT_TARGETS)
+ @echo "===> CLEAN"
+ @rm -rf $(BUILD_DIR)
+ @rm -f $(GENGROUPLOCK)
+
+distclean: clean
+ @echo "===> DIST CLEAN"
+ @rm -f likwid-*
+ @rm -f $(STATIC_TARGET_LIB)
+ @rm -f $(DYNAMIC_TARGET_LIB)
+ @rm -f $(FORTRAN_INTERFACE)
+ @rm -f $(PINLIB)
+ @rm -f tags
+
+install:
+ @echo "===> INSTALL applications to $(PREFIX)/bin"
+ @mkdir -p $(PREFIX)/bin
+ @cp -f likwid-* $(PREFIX)/bin
+ @cp -f perl/feedGnuplot $(PREFIX)/bin
+ @cp -f perl/likwid-* $(PREFIX)/bin
+ @chmod 755 $(PREFIX)/bin/likwid-*
+ @echo "===> INSTALL man pages to $(MANPREFIX)/man1"
+ @mkdir -p $(MANPREFIX)/man1
+ @sed -e "s//$(VERSION)/g" -e "s//$(DATE)/g" < $(DOC_DIR)/likwid-topology.1 > $(MANPREFIX)/man1/likwid-topology.1
+ @sed -e "s//$(VERSION)/g" -e "s//$(DATE)/g" < $(DOC_DIR)/likwid-features.1 > $(MANPREFIX)/man1/likwid-features.1
+ @sed -e "s//$(VERSION)/g" -e "s//$(DATE)/g" < $(DOC_DIR)/likwid-perfctr.1 > $(MANPREFIX)/man1/likwid-perfctr.1
+ @sed -e "s//$(VERSION)/g" -e "s//$(DATE)/g" < $(DOC_DIR)/likwid-powermeter.1 > $(MANPREFIX)/man1/likwid-powermeter.1
+ @sed -e "s//$(VERSION)/g" -e "s//$(DATE)/g" < $(DOC_DIR)/likwid-pin.1 > $(MANPREFIX)/man1/likwid-pin.1
+ @chmod 644 $(MANPREFIX)/man1/likwid-*
+ @echo "===> INSTALL headers to $(PREFIX)/include"
+ @mkdir -p $(PREFIX)/include
+ @cp -f src/includes/likwid*.h $(PREFIX)/include/
+ $(FORTRAN_INSTALL)
+ @echo "===> INSTALL libraries to $(PREFIX)/lib"
+ @mkdir -p $(PREFIX)/lib
+ @cp -f liblikwid* $(PREFIX)/lib
+ @chmod 755 $(PREFIX)/lib/$(PINLIB)
+ @echo "===> INSTALL filters to $(LIKWIDFILTERPATH)"
+ @mkdir -p $(LIKWIDFILTERPATH)
+ @cp -f filters/* $(LIKWIDFILTERPATH)
+ @chmod 755 $(LIKWIDFILTERPATH)/*
+
+uninstall:
+ @echo "===> REMOVING applications from $(PREFIX)/bin"
+ @rm -f $(addprefix $(PREFIX)/bin/,$(APPS))
+ @rm -f $(PREFIX)/bin/likwid-mpirun
+ @rm -f $(PREFIX)/bin/likwid-perfscope
+ @rm -f $(PREFIX)/bin/feedGnuplot
+ @echo "===> REMOVING man pages from $(MANPREFIX)/man1"
+ @rm -f $(addprefix $(MANPREFIX)/man1/,$(addsuffix .1,$(APPS)))
+ @echo "===> REMOVING libs from $(PREFIX)/lib"
+ @rm -f $(PREFIX)/lib/liblikwid*
+ @echo "===> REMOVING filter from $(PREFIX)/share"
+ @rm -rf $(PREFIX)/share/likwid
+
+
+
diff --git a/README b/README
new file mode 100644
index 000000000..fa1e85cb6
--- /dev/null
+++ b/README
@@ -0,0 +1,27 @@
+Likwid is a simple to install and use toolsuite of command line applications
+for performance oriented programmers. It works for Intel and AMD processors
+on the Linux operating system.
+
+It consists of:
+
+likwid-topology - print thread and cache topology
+likwid-features - view and toggle feature reagister on Intel processors
+likwid-perfctr - configure and read out hardware performance counters on Intel and AMD processors
+likwid-powermeter - read out RAPL Energy information and get info about Turbo Mode steps
+likwid-pin - pin your threaded application (pthread, Intel and gcc OpenMP to dedicated processors
+likwid-bench - Micro benchmarking platform
+likwid-gencfg - Dumps topology information to a file
+likwid-mpirun - Wrapper to start MPI and Hybrid MPI/OpenMP applications (Supports Intel MPI and OpenMPI)
+likwid-scope - Frontend to the timeline mode of likwid-perfctr, plots live graphs of performance metrics
+
+For a detailed documentation on the usage of the tools have a look at the
+likwid wiki pages at:
+
+http://code.google.com/p/likwid/wiki/Introduction
+
+If you have problems or suggestions please let me know on the likwid mailing list:
+
+http://groups.google.com/group/likwid-users
+
+
+
diff --git a/bench/phi/copy.ptt b/bench/phi/copy.ptt
new file mode 100644
index 000000000..81622bf04
--- /dev/null
+++ b/bench/phi/copy.ptt
@@ -0,0 +1,13 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 32
+vmovaps zmm0, [STR0 + GPR1 * 8]
+vmovaps zmm1, [STR0 + GPR1 * 8 + 64]
+vmovaps zmm2, [STR0 + GPR1 * 8 + 128]
+vmovaps zmm3, [STR0 + GPR1 * 8 + 192]
+vmovaps [STR1 + GPR1 * 8] , zmm0
+vmovaps [STR1 + GPR1 * 8 + 64], zmm1
+vmovaps [STR1 + GPR1 * 8 + 128], zmm2
+vmovaps [STR1 + GPR1 * 8 + 192], zmm3
diff --git a/bench/phi/copy_mem.ptt b/bench/phi/copy_mem.ptt
new file mode 100644
index 000000000..3891a38cd
--- /dev/null
+++ b/bench/phi/copy_mem.ptt
@@ -0,0 +1,19 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 32
+vprefetch0 [STR0 + GPR1 * 8 + 1024]
+vmovaps zmm0, [STR0 + GPR1 * 8]
+vmovaps zmm1, [STR0 + GPR1 * 8 + 64]
+vmovaps zmm2, [STR0 + GPR1 * 8 + 128]
+vmovaps zmm3, [STR0 + GPR1 * 8 + 192]
+vprefetch0 [STR1 + GPR1 * 8 + 1024]
+vmovnrngoaps [STR1 + GPR1 * 8], zmm0
+clevict1 [STR1 + GPR1 * 8]
+vmovnrngoaps [STR1 + GPR1 * 8 + 64], zmm1
+clevict1 [STR1 + GPR1 * 8 + 64]
+vmovnrngoaps [STR1 + GPR1 * 8 + 128], zmm2
+clevict1 [STR1 + GPR1 * 8 + 128]
+vmovnrngoaps [STR1 + GPR1 * 8 + 192], zmm3
+clevict1 [STR1 + GPR1 * 8 + 192]
diff --git a/bench/phi/copy_p0.ptt b/bench/phi/copy_p0.ptt
new file mode 100644
index 000000000..49527a245
--- /dev/null
+++ b/bench/phi/copy_p0.ptt
@@ -0,0 +1,17 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 32
+vprefetch1 [STR0 + GPR1 * 8 + 2048]
+vprefetch0 [STR0 + GPR1 * 8 + 256]
+vmovaps zmm0, [STR0 + GPR1 * 8]
+vmovaps zmm1, [STR0 + GPR1 * 8 + 64]
+vmovaps zmm2, [STR0 + GPR1 * 8 + 128]
+vmovaps zmm3, [STR0 + GPR1 * 8 + 192]
+vprefetche1 [STR1 + GPR1 * 8 + 2048]
+vprefetche0 [STR1 + GPR1 * 8 + 256]
+vmovaps [STR1 + GPR1 * 8] , zmm0
+vmovaps [STR1 + GPR1 * 8 + 64], zmm1
+vmovaps [STR1 + GPR1 * 8 + 128], zmm2
+vmovaps [STR1 + GPR1 * 8 + 192], zmm3
diff --git a/bench/phi/copy_p1.ptt b/bench/phi/copy_p1.ptt
new file mode 100644
index 000000000..c129b4db1
--- /dev/null
+++ b/bench/phi/copy_p1.ptt
@@ -0,0 +1,38 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+vprefetch0 [STR0 + GPR1 * 8]
+vprefetch0 [STR0 + GPR1 * 8 + 256]
+vprefetch0 [STR0 + GPR1 * 8 + 512]
+vprefetch0 [STR0 + GPR1 * 8 + 768]
+vprefetche0 [STR1 + GPR1 * 8 ]
+vprefetche0 [STR1 + GPR1 * 8 + 256]
+LOOP 32
+vmovaps zmm0, [STR0 + GPR1 * 8]
+vprefetch1 [STR0 + GPR1 * 8 + 2048]
+vmovaps zmm1, [STR0 + GPR1 * 8 + 64]
+vprefetch0 [STR0 + GPR1 * 8 + 1024]
+vmovaps zmm2, [STR0 + GPR1 * 8 + 128]
+vprefetche1 [STR1 + GPR1 * 8 + 2048]
+vmovaps zmm3, [STR0 + GPR1 * 8 + 192]
+vprefetche0 [STR1 + GPR1 * 8 + 1024]
+vmovaps [STR1 + GPR1 * 8] , zmm0
+vprefetch1 [STR0 + GPR1 * 8 + 2112]
+vmovaps [STR1 + GPR1 * 8 + 64], zmm1
+vprefetch0 [STR0 + GPR1 * 8 + 1088]
+vmovaps [STR1 + GPR1 * 8 + 128], zmm2
+vprefetche1 [STR1 + GPR1 * 8 + 2112]
+vmovaps [STR1 + GPR1 * 8 + 192], zmm3
+vprefetche0 [STR1 + GPR1 * 8 + 1088]
+vprefetch1 [STR0 + GPR1 * 8 + 2176]
+vprefetch0 [STR0 + GPR1 * 8 + 1152]
+vprefetche1 [STR1 + GPR1 * 8 + 2176]
+vprefetche0 [STR1 + GPR1 * 8 + 1152]
+vprefetch1 [STR0 + GPR1 * 8 + 2240]
+vprefetch0 [STR0 + GPR1 * 8 + 1216]
+vprefetche1 [STR1 + GPR1 * 8 + 2240]
+vprefetche0 [STR1 + GPR1 * 8 + 1216]
+
+
+
diff --git a/bench/phi/load.ptt b/bench/phi/load.ptt
new file mode 100644
index 000000000..e8367c277
--- /dev/null
+++ b/bench/phi/load.ptt
@@ -0,0 +1,10 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+LOOP 32
+vprefetch0 [STR0 + GPR1 * 8 + 1024]
+vmovaps zmm0, [STR0 + GPR1 * 8]
+vmovaps zmm1, [STR0 + GPR1 * 8 + 64]
+vmovaps zmm2, [STR0 + GPR1 * 8 + 128]
+vmovaps zmm3, [STR0 + GPR1 * 8 + 192]
diff --git a/bench/phi/store.ptt b/bench/phi/store.ptt
new file mode 100644
index 000000000..533501c07
--- /dev/null
+++ b/bench/phi/store.ptt
@@ -0,0 +1,14 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+vmovaps zmm0, [SCALAR]
+vmovaps zmm1, [SCALAR]
+vmovaps zmm2, [SCALAR]
+vmovaps zmm3, [SCALAR]
+LOOP 32
+vprefetch0 [STR0 + GPR1 * 8 + 1024]
+vmovaps [STR0 + GPR1 * 8] , zmm0
+vmovaps [STR0 + GPR1 * 8 + 64], zmm1
+vmovaps [STR0 + GPR1 * 8 + 128], zmm2
+vmovaps [STR0 + GPR1 * 8 + 192], zmm3
diff --git a/bench/phi/store_mem.ptt b/bench/phi/store_mem.ptt
new file mode 100644
index 000000000..fa8d2625f
--- /dev/null
+++ b/bench/phi/store_mem.ptt
@@ -0,0 +1,18 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+vmovaps zmm0, [SCALAR]
+vmovaps zmm1, [SCALAR]
+vmovaps zmm2, [SCALAR]
+vmovaps zmm3, [SCALAR]
+LOOP 32
+vprefetch0 [STR0 + GPR1 * 8 + 1024]
+vmovnrngoaps [STR0 + GPR1 * 8], zmm0
+clevict1 [STR0 + GPR1 * 8]
+vmovnrngoaps [STR0 + GPR1 * 8 + 64], zmm1
+clevict1 [STR0 + GPR1 * 8 + 64]
+vmovnrngoaps [STR0 + GPR1 * 8 + 128], zmm2
+clevict1 [STR0 + GPR1 * 8 + 128]
+vmovnrngoaps [STR0 + GPR1 * 8 + 192], zmm3
+clevict1 [STR0 + GPR1 * 8 + 192]
diff --git a/bench/phi/sum.ptt b/bench/phi/sum.ptt
new file mode 100644
index 000000000..e5d4c57be
--- /dev/null
+++ b/bench/phi/sum.ptt
@@ -0,0 +1,10 @@
+STREAMS 1
+TYPE SINGLE
+FLOPS 1
+BYTES 4
+LOOP 64
+vprefetch0 [STR0 + GPR1 * 8 + 1024]
+vaddps zmm0, zmm0, [STR0 + GPR1 * 8]
+vaddps zmm1, zmm1, [STR0 + GPR1 * 8 + 64]
+vaddps zmm2, zmm2, [STR0 + GPR1 * 8 + 128]
+vaddps zmm3, zmm3, [STR0 + GPR1 * 8 + 192]
diff --git a/bench/phi/triad.ptt b/bench/phi/triad.ptt
new file mode 100644
index 000000000..f38fe30ba
--- /dev/null
+++ b/bench/phi/triad.ptt
@@ -0,0 +1,21 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+LOOP 32
+vmovaps zmm0, [STR1 + GPR1*8]
+vmovaps zmm1, [STR1 + GPR1*8+64]
+vmovaps zmm2, [STR1 + GPR1*8+128]
+vmovaps zmm3, [STR1 + GPR1*8+192]
+vmovaps zmm4, [STR2 + GPR1*8]
+vmovaps zmm5, [STR2 + GPR1*8+64]
+vmovaps zmm6, [STR2 + GPR1*8+128]
+vmovaps zmm7, [STR2 + GPR1*8+192]
+vfmadd132pd zmm0, zmm4, [STR3 + GPR1*8]
+vfmadd132pd zmm1, zmm5, [STR3 + GPR1*8+64]
+vfmadd132pd zmm2, zmm6, [STR3 + GPR1*8+128]
+vfmadd132pd zmm3, zmm7, [STR3 + GPR1*8+192]
+vmovaps [STR0 + GPR1*8], zmm0
+vmovaps [STR0 + GPR1*8+64], zmm1
+vmovaps [STR0 + GPR1*8+128], zmm2
+vmovaps [STR0 + GPR1*8+192], zmm3
diff --git a/bench/phi/triad_mem.ptt b/bench/phi/triad_mem.ptt
new file mode 100644
index 000000000..a9babee76
--- /dev/null
+++ b/bench/phi/triad_mem.ptt
@@ -0,0 +1,29 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+LOOP 32
+vprefetch0 [STR1 + GPR1 * 8 + 1024]
+vprefetch0 [STR2 + GPR1 * 8 + 1024]
+vprefetch0 [STR3 + GPR1 * 8 + 1024]
+vmovaps zmm0, [STR1 + GPR1*8]
+vmovaps zmm1, [STR1 + GPR1*8+64]
+vmovaps zmm2, [STR1 + GPR1*8+128]
+vmovaps zmm3, [STR1 + GPR1*8+192]
+vmovaps zmm4, [STR2 + GPR1*8]
+vmovaps zmm5, [STR2 + GPR1*8+64]
+vmovaps zmm6, [STR2 + GPR1*8+128]
+vmovaps zmm7, [STR2 + GPR1*8+192]
+vfmadd132pd zmm0, zmm4, [STR3 + GPR1*8]
+vfmadd132pd zmm1, zmm5, [STR3 + GPR1*8+64]
+vfmadd132pd zmm2, zmm6, [STR3 + GPR1*8+128]
+vfmadd132pd zmm3, zmm7, [STR3 + GPR1*8+192]
+vprefetch0 [STR0 + GPR1 * 8 + 1024]
+vmovnrngoaps [STR0 + GPR1 * 8], zmm0
+clevict1 [STR0 + GPR1 * 8]
+vmovnrngoaps [STR0 + GPR1 * 8 + 64], zmm1
+clevict1 [STR0 + GPR1 * 8 + 64]
+vmovnrngoaps [STR0 + GPR1 * 8 + 128], zmm2
+clevict1 [STR0 + GPR1 * 8 + 128]
+vmovnrngoaps [STR0 + GPR1 * 8 + 192], zmm3
+clevict1 [STR0 + GPR1 * 8 + 192]
diff --git a/bench/phi/update.ptt b/bench/phi/update.ptt
new file mode 100644
index 000000000..a4d4e34e7
--- /dev/null
+++ b/bench/phi/update.ptt
@@ -0,0 +1,14 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 32
+vprefetch0 [STR0 + GPR1 * 8 + 1024]
+vmovaps zmm0, [STR0 + GPR1 * 8]
+vmovaps zmm1, [STR0 + GPR1 * 8 + 64]
+vmovaps zmm2, [STR0 + GPR1 * 8 + 128]
+vmovaps zmm3, [STR0 + GPR1 * 8 + 192]
+vmovaps [STR0 + GPR1 * 8] , zmm0
+vmovaps [STR0 + GPR1 * 8 + 64], zmm1
+vmovaps [STR0 + GPR1 * 8 + 128], zmm2
+vmovaps [STR0 + GPR1 * 8 + 192], zmm3
diff --git a/bench/x86-64/clcopy.ptt b/bench/x86-64/clcopy.ptt
new file mode 100644
index 000000000..b59c2bed3
--- /dev/null
+++ b/bench/x86-64/clcopy.ptt
@@ -0,0 +1,15 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 32
+movaps FPR1, [STR0 + GPR1 * 8 ]
+movaps FPR2, [STR0 + GPR1 * 8 + 64 ]
+movaps FPR3, [STR0 + GPR1 * 8 + 128 ]
+movaps FPR4, [STR0 + GPR1 * 8 + 192 ]
+movaps [STR1 + GPR1 * 8 ], FPR1
+movaps [STR1 + GPR1 * 8 + 64 ], FPR2
+movaps [STR1 + GPR1 * 8 + 128 ], FPR3
+movaps [STR1 + GPR1 * 8 + 192 ], FPR4
+
+
diff --git a/bench/x86-64/clload.ptt b/bench/x86-64/clload.ptt
new file mode 100644
index 000000000..8c3ddc2b5
--- /dev/null
+++ b/bench/x86-64/clload.ptt
@@ -0,0 +1,11 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+LOOP 32
+movaps FPR1, [STR0 + GPR1 * 8]
+movaps FPR2, [STR0 + GPR1 * 8 + 64]
+movaps FPR3, [STR0 + GPR1 * 8 + 128]
+movaps FPR4, [STR0 + GPR1 * 8 + 192]
+
+
diff --git a/bench/x86-64/clstore.ptt b/bench/x86-64/clstore.ptt
new file mode 100644
index 000000000..5541b8ec8
--- /dev/null
+++ b/bench/x86-64/clstore.ptt
@@ -0,0 +1,14 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+movaps FPR1, [SCALAR]
+movaps FPR2, [SCALAR]
+movaps FPR3, [SCALAR]
+movaps FPR4, [SCALAR]
+LOOP 32
+movaps [STR0 + GPR1 * 8], FPR1
+movaps [STR0 + GPR1 * 8 + 64], FPR2
+movaps [STR0 + GPR1 * 8 + 128], FPR3
+movaps [STR0 + GPR1 * 8 + 192], FPR4
+
diff --git a/bench/x86-64/copy.ptt b/bench/x86-64/copy.ptt
new file mode 100644
index 000000000..ffca4f5dc
--- /dev/null
+++ b/bench/x86-64/copy.ptt
@@ -0,0 +1,15 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 8
+movaps FPR1, [STR0 + GPR1 * 8]
+movaps FPR2, [STR0 + GPR1 * 8 + 16]
+movaps FPR3, [STR0 + GPR1 * 8 + 32]
+movaps FPR4, [STR0 + GPR1 * 8 + 48]
+movaps [STR1 + GPR1 * 8] , FPR1
+movaps [STR1 + GPR1 * 8 + 16], FPR2
+movaps [STR1 + GPR1 * 8 + 32], FPR3
+movaps [STR1 + GPR1 * 8 + 48], FPR4
+
+
diff --git a/bench/x86-64/copy_mem.ptt b/bench/x86-64/copy_mem.ptt
new file mode 100644
index 000000000..fab5a667a
--- /dev/null
+++ b/bench/x86-64/copy_mem.ptt
@@ -0,0 +1,15 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 8
+movaps FPR1, [STR0 + GPR1 * 8]
+movaps FPR2, [STR0 + GPR1 * 8 + 16]
+movaps FPR3, [STR0 + GPR1 * 8 + 32]
+movaps FPR4, [STR0 + GPR1 * 8 + 48]
+movntpd [STR1 + GPR1 * 8] , FPR1
+movntpd [STR1 + GPR1 * 8 + 16], FPR2
+movntpd [STR1 + GPR1 * 8 + 32], FPR3
+movntpd [STR1 + GPR1 * 8 + 48], FPR4
+
+
diff --git a/bench/x86-64/load.ptt b/bench/x86-64/load.ptt
new file mode 100644
index 000000000..36aaab1c9
--- /dev/null
+++ b/bench/x86-64/load.ptt
@@ -0,0 +1,12 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+LOOP 8
+mov GPR12, [STR0 + GPR1 * 8 + 256]
+movaps FPR1, [STR0 + GPR1 * 8]
+movaps FPR2, [STR0 + GPR1 * 8 + 16]
+movaps FPR3, [STR0 + GPR1 * 8 + 32]
+movaps FPR4, [STR0 + GPR1 * 8 + 48]
+
+
diff --git a/bench/x86-64/peak.ptt b/bench/x86-64/peak.ptt
new file mode 100644
index 000000000..c03e2c8d7
--- /dev/null
+++ b/bench/x86-64/peak.ptt
@@ -0,0 +1,49 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 2
+BYTES 16
+INC 8
+movaps FPR1, [SCALAR]
+sub GPR2, 4
+sub STR0, 32
+sub STR1, 32
+mov GPR1, GPR2
+neg GPR1
+.align 16
+1:
+movaps FPR2, [STR0 + GPR1 * 8 ]
+addpd FPR2, FPR1
+mulpd FPR2, FPR1
+movaps FPR6, [STR0 + GPR1 * 8 ]
+addpd FPR2, FPR1
+mulpd FPR2, FPR1
+pshufd FPR2, FPR1, 0x1
+#movaps [STR1 + GPR1 * 8], FPR2
+movaps FPR3, [STR0 + GPR1 * 8 + 16]
+addpd FPR3, FPR1
+mulpd FPR3, FPR1
+movaps FPR7, [STR0 + GPR1 * 8 + 16 ]
+addpd FPR3, FPR1
+mulpd FPR3, FPR1
+pshufd FPR3, FPR1, 0x1
+#movaps [STR1 + GPR1 * 8 + 16], FPR3
+movaps FPR4, [STR0 + GPR1 * 8 + 32]
+addpd FPR4, FPR1
+mulpd FPR4, FPR1
+movaps FPR8, [STR0 + GPR1 * 8 + 32 ]
+addpd FPR4, FPR1
+mulpd FPR4, FPR1
+pshufd FPR4, FPR1, 0x1
+#movaps [STR1 + GPR1 * 8 + 32], FPR4
+movaps FPR5, [STR0 + GPR1 * 8 + 48]
+addpd FPR5, FPR1
+mulpd FPR5, FPR1
+movaps FPR9, [STR0 + GPR1 * 8 + 48 ]
+addpd FPR5, FPR1
+mulpd FPR5, FPR1
+pshufd FPR5, FPR1, 0x1
+#movaps [STR1 + GPR1 * 8 + 48], FPR5
+add GPR1, 8
+js 1b
+
+
diff --git a/bench/x86-64/peakflops.ptt b/bench/x86-64/peakflops.ptt
new file mode 100644
index 000000000..94c769afe
--- /dev/null
+++ b/bench/x86-64/peakflops.ptt
@@ -0,0 +1,37 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 2
+BYTES 16
+INC 8
+movaps FPR1, [SCALAR]
+sub GPR2, 4
+sub STR0, 32
+sub STR1, 32
+mov GPR1, GPR2
+neg GPR1
+.align 32
+1:
+movaps FPR2, [STR0 + GPR1 * 8 ]
+addpd FPR2, FPR1
+mulpd FPR2, FPR1
+addpd FPR2, FPR1
+mulpd FPR2, FPR1
+movaps FPR3, [STR0 + GPR1 * 8 + 16]
+add GPR1, 8
+addpd FPR3, FPR1
+mulpd FPR3, FPR1
+addpd FPR3, FPR1
+mulpd FPR3, FPR1
+movaps FPR4, [STR0 + GPR1 * 8 - 32]
+addpd FPR4, FPR1
+mulpd FPR4, FPR1
+addpd FPR4, FPR1
+mulpd FPR4, FPR1
+movaps FPR5, [STR0 + GPR1 * 8 - 16]
+addpd FPR5, FPR1
+mulpd FPR5, FPR1
+addpd FPR5, FPR1
+mulpd FPR5, FPR1
+js 1b
+
+
diff --git a/bench/x86-64/store.ptt b/bench/x86-64/store.ptt
new file mode 100644
index 000000000..4ef9ab987
--- /dev/null
+++ b/bench/x86-64/store.ptt
@@ -0,0 +1,15 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+movaps FPR1, [SCALAR]
+movaps FPR2, [SCALAR]
+movaps FPR3, [SCALAR]
+movaps FPR4, [SCALAR]
+LOOP 8
+#mov GPR14, [STR0 + GPR1 * 8 + 256]
+movaps [STR0 + GPR1 * 8] , FPR1
+movaps [STR0 + GPR1 * 8 + 16], FPR2
+movaps [STR0 + GPR1 * 8 + 32], FPR3
+movaps [STR0 + GPR1 * 8 + 48], FPR4
+
diff --git a/bench/x86-64/store_mem.ptt b/bench/x86-64/store_mem.ptt
new file mode 100644
index 000000000..0a0222d6a
--- /dev/null
+++ b/bench/x86-64/store_mem.ptt
@@ -0,0 +1,14 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+movaps FPR1, [SCALAR]
+movaps FPR2, [SCALAR]
+movaps FPR3, [SCALAR]
+movaps FPR4, [SCALAR]
+LOOP 8
+movntpd [STR0 + GPR1 * 8] , FPR1
+movntpd [STR0 + GPR1 * 8 + 16], FPR2
+movntpd [STR0 + GPR1 * 8 + 32], FPR3
+movntpd [STR0 + GPR1 * 8 + 48], FPR4
+
diff --git a/bench/x86-64/stream.ptt b/bench/x86-64/stream.ptt
new file mode 100644
index 000000000..7c84c3c2d
--- /dev/null
+++ b/bench/x86-64/stream.ptt
@@ -0,0 +1,23 @@
+STREAMS 3
+TYPE DOUBLE
+FLOPS 2
+BYTES 24
+movaps FPR5, [SCALAR]
+LOOP 8
+movaps FPR1, [STR1 + GPR1*8]
+movaps FPR2, [STR1 + GPR1*8+16]
+movaps FPR3, [STR1 + GPR1*8+32]
+movaps FPR4, [STR1 + GPR1*8+48]
+mulpd FPR1, FPR5
+addpd FPR1, [STR2 + GPR1*8]
+mulpd FPR2, FPR5
+addpd FPR2, [STR2 + GPR1*8+16]
+mulpd FPR3, FPR5
+addpd FPR3, [STR2 + GPR1*8+32]
+mulpd FPR4, FPR5
+addpd FPR4, [STR2 + GPR1*8+48]
+movaps [STR0 + GPR1*8] , FPR1
+movaps [STR0 + GPR1*8+16], FPR2
+movaps [STR0 + GPR1*8+32], FPR3
+movaps [STR0 + GPR1*8+48], FPR4
+
diff --git a/bench/x86-64/stream_mem.ptt b/bench/x86-64/stream_mem.ptt
new file mode 100644
index 000000000..b8364cc0b
--- /dev/null
+++ b/bench/x86-64/stream_mem.ptt
@@ -0,0 +1,11 @@
+STREAMS 3
+TYPE DOUBLE
+FLOPS 2
+BYTES 24
+movaps FPR5, [SCALAR]
+LOOP 2
+movaps FPR1, [STR2 + GPR1*8]
+mulpd FPR1, FPR5
+addpd FPR1, [STR1 + GPR1*8]
+movntpd [STR0 + GPR1*8], FPR1
+
diff --git a/bench/x86-64/sum.ptt b/bench/x86-64/sum.ptt
new file mode 100644
index 000000000..337484340
--- /dev/null
+++ b/bench/x86-64/sum.ptt
@@ -0,0 +1,23 @@
+STREAMS 1
+TYPE SINGLE
+FLOPS 1
+BYTES 4
+xorps FPR1, FPR1
+movaps FPR2, FPR1
+movaps FPR3, FPR1
+movaps FPR4, FPR1
+movaps FPR5, FPR1
+movaps FPR6, FPR1
+movaps FPR7, FPR1
+movaps FPR8, FPR1
+LOOP 32
+addps FPR1, [STR0 + GPR1 * 4]
+addps FPR2, [STR0 + GPR1 * 4 + 16]
+addps FPR3, [STR0 + GPR1 * 4 + 32]
+addps FPR4, [STR0 + GPR1 * 4 + 48]
+addps FPR5, [STR0 + GPR1 * 4 + 64]
+addps FPR6, [STR0 + GPR1 * 4 + 80]
+addps FPR7, [STR0 + GPR1 * 4 + 96]
+addps FPR8, [STR0 + GPR1 * 4 + 112]
+
+
diff --git a/bench/x86-64/sum_avx.ptt b/bench/x86-64/sum_avx.ptt
new file mode 100644
index 000000000..e2e8e40f2
--- /dev/null
+++ b/bench/x86-64/sum_avx.ptt
@@ -0,0 +1,14 @@
+STREAMS 1
+TYPE SINGLE
+FLOPS 1
+BYTES 4
+vxorps ymm1, ymm1, ymm1
+vmovaps ymm2, ymm1
+vmovaps ymm3, ymm1
+vmovaps ymm4, ymm1
+LOOP 32
+vaddps ymm1, ymm1, [STR0 + GPR1*4]
+vaddps ymm2, ymm2, [STR0 + GPR1*4+32]
+vaddps ymm3, ymm3, [STR0 + GPR1*4+64]
+vaddps ymm4, ymm4, [STR0 + GPR1*4+96]
+
diff --git a/bench/x86-64/sum_plain.ptt b/bench/x86-64/sum_plain.ptt
new file mode 100644
index 000000000..23fe2376c
--- /dev/null
+++ b/bench/x86-64/sum_plain.ptt
@@ -0,0 +1,15 @@
+STREAMS 1
+TYPE SINGLE
+FLOPS 1
+BYTES 4
+xorps FPR1, FPR1
+xorps FPR2, FPR2
+xorps FPR3, FPR3
+xorps FPR4, FPR4
+LOOP 4
+addss FPR1, [STR0 + GPR1 * 4]
+addss FPR2, [STR0 + GPR1 * 4 + 4]
+addss FPR3, [STR0 + GPR1 * 4 + 8]
+addss FPR4, [STR0 + GPR1 * 4 + 12]
+
+
diff --git a/bench/x86-64/triad.ptt b/bench/x86-64/triad.ptt
new file mode 100644
index 000000000..d521aa093
--- /dev/null
+++ b/bench/x86-64/triad.ptt
@@ -0,0 +1,22 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+LOOP 8
+movaps FPR1, [STR1 + GPR1*8]
+movaps FPR2, [STR1 + GPR1*8+16]
+movaps FPR3, [STR1 + GPR1*8+32]
+movaps FPR4, [STR1 + GPR1*8+48]
+mulpd FPR1, [STR2 + GPR1*8]
+addpd FPR1, [STR3 + GPR1*8]
+mulpd FPR2, [STR2 + GPR1*8+16]
+addpd FPR2, [STR3 + GPR1*8+16]
+mulpd FPR3, [STR2 + GPR1*8+32]
+addpd FPR3, [STR3 + GPR1*8+32]
+mulpd FPR4, [STR2 + GPR1*8+48]
+addpd FPR4, [STR3 + GPR1*8+48]
+movaps [STR0 + GPR1*8], FPR1
+movaps [STR0 + GPR1*8+16], FPR2
+movaps [STR0 + GPR1*8+32], FPR3
+movaps [STR0 + GPR1*8+48], FPR4
+
diff --git a/bench/x86-64/triad_mem.ptt b/bench/x86-64/triad_mem.ptt
new file mode 100644
index 000000000..7c24748dd
--- /dev/null
+++ b/bench/x86-64/triad_mem.ptt
@@ -0,0 +1,10 @@
+STREAMS 4
+TYPE DOUBLE
+FLOPS 2
+BYTES 32
+LOOP 2
+movaps FPR1, [STR1 + GPR1*8]
+mulpd FPR1, [STR2 + GPR1*8]
+addpd FPR1, [STR3 + GPR1*8]
+movntpd [STR0 + GPR1*8], FPR1
+
diff --git a/bench/x86-64/update.ptt b/bench/x86-64/update.ptt
new file mode 100644
index 000000000..ac1129b6b
--- /dev/null
+++ b/bench/x86-64/update.ptt
@@ -0,0 +1,15 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+LOOP 8
+movaps FPR1, [STR0 + GPR1 * 8]
+movaps [STR0 + GPR1 * 8] , FPR1
+movaps FPR2, [STR0 + GPR1 * 8 + 16]
+movaps FPR3, [STR0 + GPR1 * 8 + 32]
+movaps FPR4, [STR0 + GPR1 * 8 + 48]
+movaps [STR0 + GPR1 * 8 + 16], FPR2
+movaps [STR0 + GPR1 * 8 + 32], FPR3
+movaps [STR0 + GPR1 * 8 + 48], FPR4
+
+
diff --git a/bench/x86/copy.ptt b/bench/x86/copy.ptt
new file mode 100644
index 000000000..111d38ba2
--- /dev/null
+++ b/bench/x86/copy.ptt
@@ -0,0 +1,18 @@
+STREAMS 2
+TYPE DOUBLE
+FLOPS 0
+BYTES 16
+mov GPR6, ARG1
+mov GPR2, STR0
+mov GPR3, STR1
+LOOP 8
+movaps FPR1, [GPR2 + GPR1 * 8]
+movaps FPR2, [GPR2 + GPR1 * 8 + 16]
+movaps FPR3, [GPR2 + GPR1 * 8 + 32]
+movaps FPR4, [GPR2 + GPR1 * 8 + 48]
+movaps [GPR3 + GPR1 * 8] , FPR1
+movaps [GPR3 + GPR1 * 8 + 16], FPR2
+movaps [GPR3 + GPR1 * 8 + 32], FPR3
+movaps [GPR3 + GPR1 * 8 + 48], FPR4
+
+
diff --git a/bench/x86/load.ptt b/bench/x86/load.ptt
new file mode 100644
index 000000000..cf001a46d
--- /dev/null
+++ b/bench/x86/load.ptt
@@ -0,0 +1,13 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+mov GPR6, ARG1
+mov GPR2, STR0
+LOOP 8
+movaps FPR1, [GPR2 + GPR1 * 8]
+movaps FPR2, [GPR2 + GPR1 * 8 + 16]
+movaps FPR3, [GPR2 + GPR1 * 8 + 32]
+movaps FPR4, [GPR2 + GPR1 * 8 + 48]
+
+
diff --git a/bench/x86/store.ptt b/bench/x86/store.ptt
new file mode 100644
index 000000000..1cf15dac6
--- /dev/null
+++ b/bench/x86/store.ptt
@@ -0,0 +1,16 @@
+STREAMS 1
+TYPE DOUBLE
+FLOPS 0
+BYTES 8
+movaps FPR1, [SCALAR]
+movaps FPR2, [SCALAR]
+movaps FPR3, [SCALAR]
+movaps FPR4, [SCALAR]
+mov GPR6, ARG1
+mov GPR2, STR0
+LOOP 8
+movaps [GPR2 + GPR1 * 8] , FPR1
+movaps [GPR2 + GPR1 * 8 + 16], FPR2
+movaps [GPR2 + GPR1 * 8 + 32], FPR3
+movaps [GPR2 + GPR1 * 8 + 48], FPR4
+
diff --git a/bench/x86/stream.ptt b/bench/x86/stream.ptt
new file mode 100644
index 000000000..bab4ecb7c
--- /dev/null
+++ b/bench/x86/stream.ptt
@@ -0,0 +1,27 @@
+STREAMS 3
+TYPE DOUBLE
+FLOPS 2
+BYTES 24
+movaps FPR5, [SCALAR]
+mov GPR6, ARG1
+mov GPR2, STR0
+mov GPR3, STR1
+mov GPR4, STR2
+LOOP 8
+movaps FPR1, [GPR3 + GPR1*8]
+movaps FPR2, [GPR3 + GPR1*8+16]
+movaps FPR3, [GPR3 + GPR1*8+32]
+movaps FPR4, [GPR3 + GPR1*8+48]
+mulpd FPR1, FPR5
+addpd FPR1, [GPR4 + GPR1*8]
+mulpd FPR2, FPR5
+addpd FPR2, [GPR4 + GPR1*8+16]
+mulpd FPR3, FPR5
+addpd FPR3, [GPR4 + GPR1*8+32]
+mulpd FPR4, FPR5
+addpd FPR4, [GPR4 + GPR1*8+48]
+movaps [GPR2 + GPR1*8] , FPR1
+movaps [GPR2 + GPR1*8+16], FPR2
+movaps [GPR2 + GPR1*8+32], FPR3
+movaps [GPR2 + GPR1*8+48], FPR4
+
diff --git a/config.mk b/config.mk
new file mode 100644
index 000000000..697dac4e6
--- /dev/null
+++ b/config.mk
@@ -0,0 +1,57 @@
+# Please have a look in INSTALL and the WIKI for details on
+# configuration options setup steps.
+# supported: GCC, MIC (ICC)
+COMPILER = GCC#NO SPACE
+
+# Define the color of the likwid-pin output
+# Can be NONE, BLACK, RED, GREEN, YELLOW, BLUE,
+# MAGENTA, CYAN or WHITE
+COLOR = BLUE#NO SPACE
+
+# Path were to install likwid
+PREFIX = /usr/local#NO SPACE
+MANPREFIX = $(PREFIX)/man#NO SPACE
+
+# For the daemon based secure msr/pci access configure
+# the absolute path to the msr daemon executable.
+# $(PREFIX)/bin/likwid-accessD
+ACCESSDAEMON = $(PREFIX)/bin/likwid-accessD#NO SPACE
+
+# Build the accessDaemon. Have a look in the WIKI for details.
+BUILDDAEMON = false#NO SPACE
+
+# Set the default mode for MSR access.
+# This can usually be overriden on the commandline.
+# Valid values are: direct, accessdaemon
+ACCESSMODE = direct#NO SPACE
+
+# Change to true to a build shared library instead of a static one
+SHARED_LIBRARY = true#NO SPACE
+
+# Build Fortran90 module interface for marker API. Adopt Fortran compiler
+# in ./make/include_.mk if necessary. Default: ifort .
+FORTRAN_INTERFACE = false#NO SPACE
+
+# Instrument likwid-bench for use with likwid-perfctr
+INSTRUMENT_BENCH = true#NO SPACE
+
+# Instrument accesses to msr registers at likwid-perfctr
+INSTRUMENT_COUNTER = false#NO SPACE
+
+# Use Portable Hardware Locality (hwloc) instead of CPUID
+USE_HWLOC = true#NO SPACE
+
+# Usually you do not need to edit below
+MAX_NUM_THREADS = 263
+MAX_NUM_NODES = 4
+HASH_TABLE_SIZE = 20
+CFG_FILE_PATH = /etc/likwid.cfg
+
+# Versioning Information
+VERSION = 3
+RELEASE = 1
+DATE = 5.2.2014
+
+LIBLIKWIDPIN = $(abspath $(PREFIX)/lib/liblikwidpin.so)
+LIKWIDFILTERPATH = $(abspath $(PREFIX)/share/likwid)
+
diff --git a/doc/likwid-features.1 b/doc/likwid-features.1
new file mode 100644
index 000000000..4b7e2ced9
--- /dev/null
+++ b/doc/likwid-features.1
@@ -0,0 +1,58 @@
+.TH LIKWID-FEATURES 1 likwid\-
+.SH NAME
+likwid-features \- print and toggle the flags of the MSR_IA32_MISC_ENABLE model specific register
+.SH SYNOPSIS
+.B likwid-features
+.RB [ \-vh ]
+.RB [ \-t
+.IR coreId ]
+.RB [ \-su
+.IR prefetcher_tag ]
+.SH DESCRIPTION
+.B likwid-features
+is a command line application to print the flags in the model
+specific register (MSR) MSR_IA32_MISC_ENABLE on Intel x86 processors. On Core2 processors
+it can be used to toggle the hardware prefetch flags. It does not work on AMD processors.
+For a documentation what flags are supported on which processor refer to the Intel
+Software Developer's Manual Volume 3B, Table B.2. The MSR are set individually for every core.
+The following hardware prefetchers can be toggled:
+.IP \[bu]
+.B HW_PREFETCHER:
+Hardware prefetcher.
+.IP \[bu]
+.B CL_PREFETCHER:
+Adjacent cache line prefetcher.
+.IP \[bu]
+.B DCU_PREFETCHER:
+When the DCU prefetcher detects multiple loads from the same line done within a
+time limit, the DCU prefetcher assumes the next line will be required. The next
+line is prefetched in to the L1 data cache from memory or L2.
+.IP \[bu]
+.B IP_PREFETCHER:
+The IP prefetcher is an L1 data cache prefetcher. The IP prefetcher looks for
+sequential load history to determine whether to prefetch the next expected data
+into the L1 cache from memory or L2.
+
+.SH OPTIONS
+.TP
+.B \-\^v
+prints version information to standard output, then exits.
+.TP
+.B \-\^h
+prints a help message to standard output, then exits.
+.TP
+.B \-\^t " coreId"
+set on which processor core the MSR should be read
+.TP
+.B \-\^u " HW_PREFETCHER | CL_PREFETCHER | DCU_PREFETCHER | IP_PREFETCHER"
+specify which prefetcher to unset
+.TP
+.B \-\^s " HW_PREFETCHER | CL_PREFETCHER | DCU_PREFETCHER | IP_PREFETCHER"
+specify which prefetcher to set
+
+.SH AUTHOR
+Written by Jan Treibig .
+.SH BUGS
+Report Bugs on .
+.SH "SEE ALSO"
+likwid-topology(1), likwid-perfCtr(1), likwid-pin(1),
diff --git a/doc/likwid-perfctr.1 b/doc/likwid-perfctr.1
new file mode 100644
index 000000000..f6d0c527e
--- /dev/null
+++ b/doc/likwid-perfctr.1
@@ -0,0 +1,196 @@
+.TH LIKWID-PERFCTR 1 likwid\-
+.SH NAME
+likwid-perfctr \- configure and read out hardware performance counters on x86 cpus
+.SH SYNOPSIS
+.B likwid-perfctr
+.RB [\-vhHVmaiCst]
+.RB [ \-c
+.IR core_list ]
+.RB [ \-g
+.IR performance_group
+or
+.IR performance_event_string ]
+.RB [ \-d
+.IR frequency
+.SH DESCRIPTION
+.B likwid-perfctr
+is a lightweight command line application to configure and read out hardware performance monitoring data
+on supported x86 processors. It can measure either as wrapper without changing the measured application
+or with marker API functions inside the code, which will turn on and off the counters. There are preconfigured
+groups with useful event sets and derived metrics. Additonally arbitrary events can be measured with
+custom event sets. The marker API can measure mulitple named regions. Results are accumulated on multiple calls.
+The following x86 processor's are supported:
+.IP \[bu]
+.B Intel Core 2:
+all variants. Counters:
+.I PMC0, PMC1, FIXC0, FIXC1, FIXC2
+.IP \[bu]
+.B Intel Nehalem:
+all variants. Counters:
+.I PMC0, PMC1, PMC2, PMC3, UPMC0 - UPMC7, FIXC0, FIXC1, FIXC2
+.IP \[bu]
+.B Intel Nehalem EX:
+all variants, no uncore for the moment. Counters:
+.I PMC0, PMC1, PMC2, PMC3, FIXC0, FIXC1, FIXC2
+.IP \[bu]
+.B Intel Westmere:
+all variants, Counters:
+.I PMC0, PMC1, PMC2, PMC3, UPMC0 - UPMC7, FIXC0, FIXC1, FIXC2
+.IP \[bu]
+.B Intel Sandy Bridge:
+all variants, no uncore at the moment, experimental support, Counters:
+.I PMC0, PMC1, PMC2, PMC3, FIXC0, FIXC1, FIXC2
+.IP \[bu]
+.B Intel Pentium M:
+Banias and Dothan variants. Counters:
+.I PMC0, PMC1
+.IP \[bu]
+.B Intel P6:
+Tested on P3.
+.IP \[bu]
+.B AMD K8:
+all variants. Counters:
+.I PMC0, PMC1, PMC2, PMC3
+.IP \[bu]
+.B AMD K10:
+Barcelona, Shanghai, Istanbul, MagnyCours based processors. Counters:
+.I PMC0, PMC1, PMC2, PMC3
+
+.SH OPTIONS
+.TP
+.B \-\^v
+prints version information to standard output, then exits.
+.TP
+.B \-\^h
+prints a help message to standard output, then exits.
+.TP
+.B \-\^H
+prints group help message (use together with -g switch).
+.TP
+.B \-\^V
+verbose output during execution for debugging.
+.TP
+.B \-\^m
+run in marker API mode
+.TP
+.B \-\^a
+print available performance groups for current processor, then exit.
+.TP
+.B \-\^e
+print available counters and performance events of current processor.
+.TP
+.B \-\^o
+store all ouput to a file instead of stdout. For the filename the following placeholders are supported:
+%j for PBS_JOBID, %r for MPI RANK (only Intel MPI at the moment), %h hostname and %p for process pid.
+The placeholders must be separated by underscore as, e.g., -o test_%h_%p. You must specify a suffix to
+the filename. For txt the output is printed as is to the file. Other suffixes trigger a filter on the output.
+Available filters are csv (comma separated values) and xml at the moment.
+.TP
+.B \-\^i
+print cpuid information about processor and on Intel Performance Monitoring features, then exit.
+.TP
+.B \-\^c " processor_list"
+specify a numerical list of processors. The list may contain multiple
+items, separated by comma, and ranges. For example 0,3,9-11.
+.TP
+.B \-\^C " processor_list"
+specify a numerical list of processors. The list may contain multiple
+items, separated by comma, and ranges. For example 0,3,9-11. This variant will
+also pin the threads to the cores. Also logical numberings can be used.
+.TP
+.B \-\^g " performance group or performance event set string"
+specify which performance group to measure. This can be one of the tags output with the -a flag.
+Also a custom event set can be specified by a comma separated list of events. Each event has the format
+eventId:register with the the register being one of a architecture supported performance counter registers.
+.TP
+.B \-\^d " frequency of measurements in seconds"
+timeline mode for time resolved measurements. The output has the format:
+.TP
+.B ...
+
+.SH EXAMPLE
+Because
+.B likwid-perfctr
+measures on processors and not single applications it is necessary to ensure
+that processes and threads are pinned to dedicated resources. You can either pin the application yourself
+or the builtin pin functionality.
+.IP 1. 4
+As wrapper with performance group:
+.TP
+.B likwid-perfctr -C 0-2 -g TLB ./cacheBench -n 2 -l 1048576 -i 100 -t Stream
+.PP
+The parent process is pinned to processor 0, Thread 0 to processor 1 and Thread 1 to processor 2.
+.IP 2. 4
+As wrapper with custom event set on AMD:
+.TP
+.B likwid-perfctr -C 0-4 -g INSTRUCTIONS_RETIRED_SSE:PMC0,CPU_CLOCKS_UNHALTED:PMC3 ./cacheBench
+.PP
+It is specified that the event
+.B INSTRUCTIONS_RETIRED_SSE
+is measured on counter
+.B PMC0
+and the event
+.B CPU_CLOCKS_UNHALTED
+on counter
+.B PMC3.
+It is possible calculate the runtime of all threads based on the
+.B CPU_CLOCKS_UNHALTED
+event. If you want this you have to include this event in your custom event string as shown above.
+
+.IP 3. 4
+As wrapper with custom event set on Intel:
+.TP
+.B likwid-perfctr -C 0 -g INSTR_RETIRED_ANY:FIXC0,CPU_CLK_UNHALTED_CORE:FIXC1,UNC_L3_LINES_IN_ANY:UPMC0 ./stream-icc
+.PP
+On Intel processors fixed events are measured on dedicated counters. These are
+.B INSTR_RETIRED_ANY
+and
+.B CPU_CLK_UNHALTED_CORE.
+If you configure these fixed counters,
+.B likwid-perfctr
+will calculate the runtime and CPI metrics for your run.
+
+.IP 4. 4
+Using the marker API to measure only parts of your code (this can be used both with groups or custom event sets):
+.TP
+.B likwid-perfctr -m -C 0-4 -g INSTRUCTIONS_RETIRED_SSE:PMC0,CPU_CLOCKS_UNHALTED:PMC3 ./cacheBench
+.PP
+You have to link you code against liblikwid.a and use the marker API calls.
+The following code snippet shows the necessary calls:
+
+.nf
+#include
+
+/* only one thread calls init */
+ if (threadId == 0)
+ {
+ likwid_markerInit();
+ }
+ BARRIER;
+ likwid_markerStartRegion("Benchmark");
+ /* your code to be measured is here */
+
+ likwid_markerStopRegion("Benchmark");
+ BARRIER;
+ /* again only one thread can close the markers */
+ if (threadId == 0)
+ {
+ likwid_markerClose();
+ }
+.fi
+
+.IP 5. 4
+Using likwid in timeline mode:
+.TP
+.B likwid-perfctr -c 0-3 -g FLOPS_DP -d 300ms ./cacheBench > out.txt
+.PP
+This will read out the counters every 300ms on physical cores 0-3 and write the results to out.txt.
+For timeline mode there is a frontend application likwid-scope, which enables live plotting of selected events.
+For more code examples have a look at the likwid WIKI pages.
+
+.SH AUTHOR
+Written by Jan Treibig .
+.SH BUGS
+Report Bugs on .
+.SH SEE ALSO
+likwid-topology(1), likwid-features(1), likwid-pin(1), likwid-bench(1)
diff --git a/doc/likwid-pin.1 b/doc/likwid-pin.1
new file mode 100644
index 000000000..9d95365a4
--- /dev/null
+++ b/doc/likwid-pin.1
@@ -0,0 +1,120 @@
+.TH LIKWID-PIN 1 likwid\-VERSION
+.SH NAME
+likwid-pin \- pin a sequential or threaded application to dedicated processors
+.SH SYNOPSIS
+.B likwid-pin
+.RB [\-vh]
+.RB [ \-c
+.IR corelist
+.RB [ \-s
+.IR skip_mask ]
+.RB [ \-S
+.IR Sweep memory before run]
+.RB [ \-p]
+.RB [ \-q]
+.RB [ \-i]
+.SH DESCRIPTION
+.B likwid-pin
+is a command line application to pin a sequential or multi threaded
+application to dedicated processors. It can be used as replacement for taskset.
+Opposite to taskset no affinity mask but single processors are specified.
+For multi threaded applications based on the pthread library the
+.I pthread_create
+library call is overloaded through a LD_PRELOAD and each created thread is pinned
+to a dedicated processor as specified in
+.I core_list .
+.PP
+Per default every generated thread is pinned to the core in the order of calls
+to pthread_create. It is possible to skip single threads.
+.PP
+For OpenMP implementations gcc and icc compilers are explicitly supported. Others may also work
+.B likwid-pin
+sets the environment variable OMP_NUM_THREADS for you if not already present.
+It will set as many threads as present in the pin expression. Be aware that
+with pthreads the parent thread is always pinned. If you create for example 4
+threads with pthread_create and do not use the parent process as worker you
+still have to provide num_threads+1 processor ids.
+.PP
+.B likwid-pin
+supports different numberings for pinning. Per default physical numbering of
+the cores is used. This is the numbering also likwid-topology reports. But
+also logical numbering inside the node or the sockets can be used. If using
+with a N (e.g. -c N:0-6) the cores are logical numbered over the whole node.
+Physical cores come first. If a system e.g. has 8 cores with 16 SMT threads
+with -c N:0-7 you get all physical cores. If you specify -c N:0-15 you get all
+physical cores and all SMT threads. With S you can specify logical numberings
+inside sockets, again physical cores come first. You can mix different domains
+with a @. -c S0:0-3@S2:2-3 you pin thread 0-3 to logical cores 0-3 on socket 0
+and threads 4-6 on logical cores 2-3 on socket 2.
+.PP
+For applications where first touch policy on numa systems cannot be employed
+.B likwid-pin
+can be used to turn on interleave memory placement. This can significantly
+speed up the performance of memory bound multi threaded codes. All numa nodes
+the user pinned threads to are used for interleaving.
+
+.SH OPTIONS
+.TP
+.B \-\^v
+prints version information to standard output, then exits.
+.TP
+.B \-\^h
+prints a help message to standard output, then exits.
+.TP
+.B \-\^c " processor_list OR thread expression OR scatter policy "
+specify a numerical list of processors. The list may contain multiple
+items, separated by comma, and ranges. For example 0,3,9-11. You can also use
+logical numberings, either within a node (N), a socket (S) or a numa domain (M).
+likwid-pin also supports logical pinning within a cpuset with a L prefix. If you ommit this option
+likwid-pin will pin the threads to the processors on the node with physical cores first.
+See below for details on using a thread expression or scatter policy
+.TP
+.B \-\^s " skip_mask
+Specify skip mask as HEX number. For each set bit the corresponding thread is skipped.
+.TP
+.B \-\^S " enable memory sweeper
+All ccNUMA memory domains belonging to the specified threadlist will be cleaned before the run. Can solve file buffer cache problems on Linux.
+.TP
+.B \-\^p
+prints the available thread domains for logical pinning
+.TP
+.B \-\^i
+set numa memory policy to interleave involving all numa nodes involved in pinning
+.TP
+.B \-\^q
+silent execution without output
+
+
+.SH EXAMPLE
+.IP 1. 4
+For standard pthread application:
+.TP
+.B likwid-pin -c 0,2,4-6 ./myApp
+.PP
+The parent process is pinned to processor 0. Thread 0 to processor 2, thread
+1 to processor 4, thread 2 to processor 5 and thread 3 to processor 6. If more threads
+are created than specified in the processor list, these threads are pinned to processor 0
+as fallback.
+.IP 2. 4
+For gcc OpenMP as many ids must be specified in processor list as there are threads:
+.TP
+.B OMP_NUM_THREADS=4; likwid-pin -c 0,2,1,3 ./myApp
+.IP 3. 4
+For Intel icc OpenMP the flag
+.B \-\^t
+.I intel
+must be set.
+.TP
+.B OMP_NUM_THREADS=4; likwid-pin -t intel -c S0:0,1@S1:0,1 ./myApp
+.IP 4. 4
+Full control over the pinning can be achieved by specifying a skip mask.
+For example above case for Intel OpenMP can also be achieved with:
+.TP
+.B OMP_NUM_THREADS=4; likwid-pin -s 0x1 -c 0,2,1,3 ./myApp
+
+.SH AUTHOR
+Written by Jan Treibig .
+.SH BUGS
+Report Bugs on .
+.SH "SEE ALSO"
+taskset(1), likwid-perfctr(1), likwid-features(1), likwid-topology(1),
diff --git a/doc/likwid-powermeter.1 b/doc/likwid-powermeter.1
new file mode 100644
index 000000000..a05d52852
--- /dev/null
+++ b/doc/likwid-powermeter.1
@@ -0,0 +1,41 @@
+.TH LIKWID-POWERMETER 1 likwid\-
+.SH NAME
+likwid-powermeter \- A tool to print Power and Clocking information on Intel CPUS
+.SH SYNOPSIS
+.B likwid-powermeter
+.RB [ \-vh ]
+.RB [ \-c
+.IR socketId ]
+.RB [ \-s
+.IR duration in seconds ]
+.SH DESCRIPTION
+.B likwid-powermeter
+is a command line application to get the Energy comsumption on Intel RAPL capable processors. Currently
+only Intel SandyBridge is supported. It also prints information about TDP and Turbo Mode steps supported.
+The Turbo Mode information works on all Turbo mode enabled Intel processors. The tool can be either used
+in stethoscope mode for a specified duration or as a wrapper to your application measuring your complete
+run. RAPL works on a per package (socket) base.
+Please note that the RAPL counters are also accessible as normal events withing likwid-perfctr.
+.SH OPTIONS
+.TP
+.B \-\^v
+prints version information to standard output, then exits.
+.TP
+.B \-\^h
+prints a help message to standard output, then exits.
+.TP
+.B \-\^c " socketId"
+set on which socket the RAPL interface is accessed.
+.TP
+.B \-\^p
+prints out information about dynamic clocks and CPI information on the socket measured.
+.TP
+.B \-\^i
+prints out information TDP and Turbo mode steps
+
+.SH AUTHOR
+Written by Jan Treibig .
+.SH BUGS
+Report Bugs on .
+.SH "SEE ALSO"
+likwid-topology(1), likwid-perfCtr(1), likwid-pin(1),
diff --git a/doc/likwid-topology.1 b/doc/likwid-topology.1
new file mode 100644
index 000000000..911943156
--- /dev/null
+++ b/doc/likwid-topology.1
@@ -0,0 +1,36 @@
+.TH LIKWID-TOPOLOGY 1 likwid\-
+.SH NAME
+likwid-topology \- print thread and cache topology
+.SH SYNOPSIS
+.B likwid-topology
+.RB [\-hvgcC]
+.SH DESCRIPTION
+.B likwid-topology
+is a command line application to print the thread and cache
+toppology on multicore x86 processors. Used with mono spaced fonts it can
+draw the processor topology of a machine in ascii art. Beyond topology
+likwid-topology determines the clock of a processor and prints detailed
+informations about the caches hierarchy.
+.SH OPTIONS
+.TP
+.B \-v
+prints version information to standard output, then exits.
+.TP
+.B \-h
+prints a help message to standard output, then exits.
+.TP
+.B \-g
+prints topology information in ascii art. Best viewed with monospaced font.
+.TP
+.B \-c
+prints detailed informations about cache hierarchy
+.TP
+.B \-C
+measures and output the processor clock. This involves a longer runtime of likwid-topology.
+
+.SH AUTHOR
+Written by Jan Treibig .
+.SH BUGS
+Report Bugs on .
+.SH "SEE ALSO"
+likwid-perfCtr(1), likwid-features(1), likwid-pin(1),
diff --git a/ext/hwloc/AUTHORS b/ext/hwloc/AUTHORS
new file mode 100644
index 000000000..837b27f2c
--- /dev/null
+++ b/ext/hwloc/AUTHORS
@@ -0,0 +1,8 @@
+Cédric Augonnet
+Jérôme Clet-Ortega
+Ludovic Courtès
+Brice Goglin
+Nathalie Furmento
+Samuel Thibault
+Jeff Squyres
+Alexey Kardashevskiy
diff --git a/ext/hwloc/COPYING b/ext/hwloc/COPYING
new file mode 100644
index 000000000..32128c7f2
--- /dev/null
+++ b/ext/hwloc/COPYING
@@ -0,0 +1,28 @@
+Copyright © 2009 CNRS
+Copyright © 2009 inria. All rights reserved.
+Copyright © 2009 Université Bordeaux 1
+Copyright © 2009 Cisco Systems, Inc. All rights reserved.
+Copyright © 2012 Blue Brain Project, EPFL. All rights reserved.
+See COPYING in top-level directory.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+3. The name of the author may not be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/ext/hwloc/Makefile b/ext/hwloc/Makefile
new file mode 100644
index 000000000..45920e8ae
--- /dev/null
+++ b/ext/hwloc/Makefile
@@ -0,0 +1,53 @@
+SRC_DIRS = ./src
+MAKE_DIR = ../../make
+
+#DO NOT EDIT BELOW
+
+include ../../config.mk
+include $(MAKE_DIR)/include_$(COMPILER).mk
+
+CFLAGS = -O2 -Wall -fPIC
+INCLUDES = -I./include
+DEFINES =
+LIBS = -lm -Wl,-E
+LFLAGS =
+Q ?= @
+
+#CONFIGURE BUILD SYSTEM
+BUILD_DIR = ./$(COMPILER)
+
+VPATH = $(SRC_DIRS)
+FILES = $(notdir $(foreach dir,$(SRC_DIRS),$(wildcard $(dir)/*.c)))
+OBJ = $(patsubst %.c, $(BUILD_DIR)/%.o, $(FILES))
+
+LIBHWLOC = libhwloc.a
+
+CPPFLAGS := $(CPPFLAGS) $(DEFINES) $(INCLUDES)
+
+all: $(BUILD_DIR) $(OBJ) $(LIBHWLOC)
+
+$(BUILD_DIR):
+ @mkdir $(BUILD_DIR)
+
+$(LIBHWLOC):
+ $(Q)${AR} -cq $(LIBHWLOC) $(OBJ)
+
+#PATTERN RULES
+$(BUILD_DIR)/%.o: %.c
+ ${Q}$(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@
+ ${Q}$(CC) $(CPPFLAGS) -MT $(@:.d=.o) -MM $< > $(BUILD_DIR)/$*.d
+
+ifeq ($(findstring $(MAKECMDGOALS),clean),)
+-include $(OBJ:.o=.d)
+endif
+
+.PHONY: clean distclean
+
+clean:
+ @rm -rf $(BUILD_DIR) $(LIBHWLOC)
+
+distclean: clean
+ @rm -f $(TARGET)
+
+
+
diff --git a/ext/hwloc/include/hwloc.h b/ext/hwloc/include/hwloc.h
new file mode 100644
index 000000000..c4fda856f
--- /dev/null
+++ b/ext/hwloc/include/hwloc.h
@@ -0,0 +1,2258 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2013 Inria. All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux 1
+ * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/*=====================================================================
+ * PLEASE GO READ THE DOCUMENTATION!
+ * ------------------------------------------------
+ * $tarball_directory/doc/doxygen-doc/
+ * or
+ * http://www.open-mpi.org/projects/hwloc/doc/
+ *=====================================================================
+ *
+ * FAIR WARNING: Do NOT expect to be able to figure out all the
+ * subtleties of hwloc by simply reading function prototypes and
+ * constant descrptions here in this file.
+ *
+ * Hwloc has wonderful documentation in both PDF and HTML formats for
+ * your reading pleasure. The formal documentation explains a LOT of
+ * hwloc-specific concepts, provides definitions, and discusses the
+ * "big picture" for many of the things that you'll find here in this
+ * header file.
+ *
+ * The PDF/HTML documentation was generated via Doxygen; much of what
+ * you'll see in there is also here in this file. BUT THERE IS A LOT
+ * THAT IS IN THE PDF/HTML THAT IS ***NOT*** IN hwloc.h!
+ *
+ * There are entire paragraph-length descriptions, discussions, and
+ * pretty prictures to explain subtle corner cases, provide concrete
+ * examples, etc.
+ *
+ * Please, go read the documentation. :-)
+ *
+ *=====================================================================*/
+
+/** \file
+ * \brief The hwloc API.
+ *
+ * See hwloc/bitmap.h for bitmap specific macros.
+ * See hwloc/helper.h for high-level topology traversal helpers.
+ * See hwloc/inlines.h for the actual inline code of some functions below.
+ */
+
+#ifndef HWLOC_H
+#define HWLOC_H
+
+#include
+#include
+#include
+#include
+#include
+
+/*
+ * Symbol transforms
+ */
+#include
+
+/*
+ * Bitmap definitions
+ */
+
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_api_version API version
+ * @{
+ */
+
+/** \brief Indicate at build time which hwloc API version is being used. */
+#define HWLOC_API_VERSION 0x00010800
+
+/** \brief Indicate at runtime which hwloc API version was used at build time. */
+HWLOC_DECLSPEC unsigned hwloc_get_api_version(void);
+
+/** \brief Current component and plugin ABI version (see hwloc/plugins.h) */
+#define HWLOC_COMPONENT_ABI 3
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_object_sets Object Sets (hwloc_cpuset_t and hwloc_nodeset_t)
+ *
+ * Hwloc uses bitmaps to represent two distinct kinds of object sets:
+ * CPU sets (::hwloc_cpuset_t) and NUMA node sets (::hwloc_nodeset_t).
+ * These types are both typedefs to a common back end type
+ * (::hwloc_bitmap_t), and therefore all the hwloc bitmap functions
+ * are applicable to both ::hwloc_cpuset_t and ::hwloc_nodeset_t (see
+ * \ref hwlocality_bitmap).
+ *
+ * The rationale for having two different types is that even though
+ * the actions one wants to perform on these types are the same (e.g.,
+ * enable and disable individual items in the set/mask), they're used
+ * in very different contexts: one for specifying which processors to
+ * use and one for specifying which NUMA nodes to use. Hence, the
+ * name difference is really just to reflect the intent of where the
+ * type is used.
+ *
+ * @{
+ */
+
+/** \brief A CPU set is a bitmap whose bits are set according to CPU
+ * physical OS indexes.
+ *
+ * It may be consulted and modified with the bitmap API as any
+ * ::hwloc_bitmap_t (see hwloc/bitmap.h).
+ */
+typedef hwloc_bitmap_t hwloc_cpuset_t;
+/** \brief A non-modifiable ::hwloc_cpuset_t. */
+typedef hwloc_const_bitmap_t hwloc_const_cpuset_t;
+
+/** \brief A node set is a bitmap whose bits are set according to NUMA
+ * memory node physical OS indexes.
+ *
+ * It may be consulted and modified with the bitmap API as any
+ * ::hwloc_bitmap_t (see hwloc/bitmap.h).
+ *
+ * When binding memory on a system without any NUMA node
+ * (when the whole memory is considered as a single memory bank),
+ * the nodeset may be either empty (no memory selected)
+ * or full (whole system memory selected).
+ *
+ * See also \ref hwlocality_helper_nodeset_convert.
+ */
+typedef hwloc_bitmap_t hwloc_nodeset_t;
+/** \brief A non-modifiable ::hwloc_nodeset_t.
+ */
+typedef hwloc_const_bitmap_t hwloc_const_nodeset_t;
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_object_types Object Types
+ * @{
+ */
+
+/** \brief Type of topology object.
+ *
+ * \note Do not rely on the ordering or completeness of the values as new ones
+ * may be defined in the future! If you need to compare types, use
+ * hwloc_compare_types() instead.
+ */
+typedef enum {
+ /* ***************************************************************
+ WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+
+ If new enum values are added here, you MUST also go update the
+ obj_type_order[] and obj_order_type[] arrays in src/topology.c.
+
+ WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ *************************************************************** */
+
+ HWLOC_OBJ_SYSTEM, /**< \brief Whole system (may be a cluster of machines).
+ * The whole system that is accessible to hwloc.
+ * That may comprise several machines in SSI systems
+ * like Kerrighed.
+ */
+ HWLOC_OBJ_MACHINE, /**< \brief Machine.
+ * The typical root object type.
+ * A set of processors and memory with cache
+ * coherency.
+ */
+ HWLOC_OBJ_NODE, /**< \brief NUMA node.
+ * A set of processors around memory which the
+ * processors can directly access.
+ */
+ HWLOC_OBJ_SOCKET, /**< \brief Socket, physical package, or chip.
+ * In the physical meaning, i.e. that you can add
+ * or remove physically.
+ */
+ HWLOC_OBJ_CACHE, /**< \brief Cache.
+ * Can be L1i, L1d, L2, L3, ...
+ */
+ HWLOC_OBJ_CORE, /**< \brief Core.
+ * A computation unit (may be shared by several
+ * logical processors).
+ */
+ HWLOC_OBJ_PU, /**< \brief Processing Unit, or (Logical) Processor.
+ * An execution unit (may share a core with some
+ * other logical processors, e.g. in the case of
+ * an SMT core).
+ *
+ * Objects of this kind are always reported and can
+ * thus be used as fallback when others are not.
+ */
+
+ HWLOC_OBJ_GROUP, /**< \brief Group objects.
+ * Objects which do not fit in the above but are
+ * detected by hwloc and are useful to take into
+ * account for affinity. For instance, some operating systems
+ * expose their arbitrary processors aggregation this
+ * way. And hwloc may insert such objects to group
+ * NUMA nodes according to their distances.
+ *
+ * These objects are ignored when they do not bring
+ * any structure.
+ */
+
+ HWLOC_OBJ_MISC, /**< \brief Miscellaneous objects.
+ * Objects without particular meaning, that can e.g. be
+ * added by the application for its own use.
+ */
+
+ HWLOC_OBJ_BRIDGE, /**< \brief Bridge.
+ * Any bridge that connects the host or an I/O bus,
+ * to another I/O bus.
+ * Bridge objects have neither CPU sets nor node sets.
+ * They are not added to the topology unless I/O discovery
+ * is enabled with hwloc_topology_set_flags().
+ */
+ HWLOC_OBJ_PCI_DEVICE, /**< \brief PCI device.
+ * These objects have neither CPU sets nor node sets.
+ * They are not added to the topology unless I/O discovery
+ * is enabled with hwloc_topology_set_flags().
+ */
+ HWLOC_OBJ_OS_DEVICE, /**< \brief Operating system device.
+ * These objects have neither CPU sets nor node sets.
+ * They are not added to the topology unless I/O discovery
+ * is enabled with hwloc_topology_set_flags().
+ */
+
+ HWLOC_OBJ_TYPE_MAX /**< \private Sentinel value */
+
+ /* ***************************************************************
+ WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+
+ If new enum values are added here, you MUST also go update the
+ obj_type_order[] and obj_order_type[] arrays in src/topology.c.
+
+ WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
+ *************************************************************** */
+} hwloc_obj_type_t;
+
+/** \brief Cache type. */
+typedef enum hwloc_obj_cache_type_e {
+ HWLOC_OBJ_CACHE_UNIFIED, /**< \brief Unified cache. */
+ HWLOC_OBJ_CACHE_DATA, /**< \brief Data cache. */
+ HWLOC_OBJ_CACHE_INSTRUCTION /**< \brief Instruction cache.
+ * Only used when the HWLOC_TOPOLOGY_FLAG_ICACHES topology flag is set. */
+} hwloc_obj_cache_type_t;
+
+/** \brief Type of one side (upstream or downstream) of an I/O bridge. */
+typedef enum hwloc_obj_bridge_type_e {
+ HWLOC_OBJ_BRIDGE_HOST, /**< \brief Host-side of a bridge, only possible upstream. */
+ HWLOC_OBJ_BRIDGE_PCI /**< \brief PCI-side of a bridge. */
+} hwloc_obj_bridge_type_t;
+
+/** \brief Type of a OS device. */
+typedef enum hwloc_obj_osdev_type_e {
+ HWLOC_OBJ_OSDEV_BLOCK, /**< \brief Operating system block device.
+ * For instance "sda" on Linux. */
+ HWLOC_OBJ_OSDEV_GPU, /**< \brief Operating system GPU device.
+ * For instance ":0.0" for a GL display,
+ * "card0" for a Linux DRM device. */
+ HWLOC_OBJ_OSDEV_NETWORK, /**< \brief Operating system network device.
+ * For instance the "eth0" interface on Linux. */
+ HWLOC_OBJ_OSDEV_OPENFABRICS, /**< \brief Operating system openfabrics device.
+ * For instance the "mlx4_0" InfiniBand HCA device on Linux. */
+ HWLOC_OBJ_OSDEV_DMA, /**< \brief Operating system dma engine device.
+ * For instance the "dma0chan0" DMA channel on Linux. */
+ HWLOC_OBJ_OSDEV_COPROC /**< \brief Operating system co-processor device.
+ * For instance "mic0" for a Xeon Phi (MIC) on Linux,
+ * "opencl0d0" for a OpenCL device,
+ * "cuda0" for a CUDA device. */
+} hwloc_obj_osdev_type_t;
+
+/** \brief Compare the depth of two object types
+ *
+ * Types shouldn't be compared as they are, since newer ones may be added in
+ * the future. This function returns less than, equal to, or greater than zero
+ * respectively if \p type1 objects usually include \p type2 objects, are the
+ * same as \p type2 objects, or are included in \p type2 objects. If the types
+ * can not be compared (because neither is usually contained in the other),
+ * HWLOC_TYPE_UNORDERED is returned. Object types containing CPUs can always
+ * be compared (usually, a system contains machines which contain nodes which
+ * contain sockets which contain caches, which contain cores, which contain
+ * processors).
+ *
+ * \note HWLOC_OBJ_PU will always be the deepest.
+ * \note This does not mean that the actual topology will respect that order:
+ * e.g. as of today cores may also contain caches, and sockets may also contain
+ * nodes. This is thus just to be seen as a fallback comparison method.
+ */
+HWLOC_DECLSPEC int hwloc_compare_types (hwloc_obj_type_t type1, hwloc_obj_type_t type2) __hwloc_attribute_const;
+
+enum hwloc_compare_types_e {
+ HWLOC_TYPE_UNORDERED = INT_MAX /**< \brief Value returned by hwloc_compare_types when types can not be compared. \hideinitializer */
+};
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_objects Object Structure and Attributes
+ * @{
+ */
+
+union hwloc_obj_attr_u;
+
+/** \brief Object memory */
+struct hwloc_obj_memory_s {
+ hwloc_uint64_t total_memory; /**< \brief Total memory (in bytes) in this object and its children */
+ hwloc_uint64_t local_memory; /**< \brief Local memory (in bytes) */
+
+ /** \brief Size of array \p page_types */
+ unsigned page_types_len;
+ /** \brief Array of local memory page types, \c NULL if no local memory and \p page_types is 0.
+ *
+ * The array is sorted by increasing \p size fields.
+ * It contains \p page_types_len slots.
+ */
+ struct hwloc_obj_memory_page_type_s {
+ hwloc_uint64_t size; /**< \brief Size of pages */
+ hwloc_uint64_t count; /**< \brief Number of pages of this size */
+ } * page_types;
+};
+
+/** \brief Structure of a topology object
+ *
+ * Applications must not modify any field except hwloc_obj.userdata.
+ */
+struct hwloc_obj {
+ /* physical information */
+ hwloc_obj_type_t type; /**< \brief Type of object */
+ unsigned os_index; /**< \brief OS-provided physical index number */
+ char *name; /**< \brief Object description if any */
+
+ struct hwloc_obj_memory_s memory; /**< \brief Memory attributes */
+
+ union hwloc_obj_attr_u *attr; /**< \brief Object type-specific Attributes,
+ * may be \c NULL if no attribute value was found */
+
+ /* global position */
+ unsigned depth; /**< \brief Vertical index in the hierarchy.
+ * If the topology is symmetric, this is equal to the
+ * parent depth plus one, and also equal to the number
+ * of parent/child links from the root object to here.
+ */
+ unsigned logical_index; /**< \brief Horizontal index in the whole list of similar objects,
+ * could be a "cousin_rank" since it's the rank within the "cousin" list below */
+ signed os_level; /**< \brief OS-provided physical level, -1 if unknown or meaningless */
+
+ /* cousins are all objects of the same type (and depth) across the entire topology */
+ struct hwloc_obj *next_cousin; /**< \brief Next object of same type and depth */
+ struct hwloc_obj *prev_cousin; /**< \brief Previous object of same type and depth */
+
+ /* children of the same parent are siblings, even if they may have different type and depth */
+ struct hwloc_obj *parent; /**< \brief Parent, \c NULL if root (system object) */
+ unsigned sibling_rank; /**< \brief Index in parent's \c children[] array */
+ struct hwloc_obj *next_sibling; /**< \brief Next object below the same parent */
+ struct hwloc_obj *prev_sibling; /**< \brief Previous object below the same parent */
+
+ /* children array below this object */
+ unsigned arity; /**< \brief Number of children */
+ struct hwloc_obj **children; /**< \brief Children, \c children[0 .. arity -1] */
+ struct hwloc_obj *first_child; /**< \brief First child */
+ struct hwloc_obj *last_child; /**< \brief Last child */
+
+ /* misc */
+ void *userdata; /**< \brief Application-given private data pointer,
+ * initialized to \c NULL, use it as you wish.
+ * See hwloc_topology_set_userdata_export_callback()
+ * if you wish to export this field to XML. */
+
+ /* cpusets and nodesets */
+ hwloc_cpuset_t cpuset; /**< \brief CPUs covered by this object
+ *
+ * This is the set of CPUs for which there are PU objects in the topology
+ * under this object, i.e. which are known to be physically contained in this
+ * object and known how (the children path between this object and the PU
+ * objects).
+ *
+ * If the HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, some of
+ * these CPUs may be offline, or not allowed for binding, see online_cpuset
+ * and allowed_cpuset.
+ *
+ * \note Its value must not be changed, hwloc_bitmap_dup must be used instead.
+ */
+ hwloc_cpuset_t complete_cpuset; /**< \brief The complete CPU set of logical processors of this object,
+ *
+ * This includes not only the same as the cpuset field, but also the CPUs for
+ * which topology information is unknown or incomplete, and the CPUs that are
+ * ignored when the HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set.
+ * Thus no corresponding PU object may be found in the topology, because the
+ * precise position is undefined. It is however known that it would be somewhere
+ * under this object.
+ *
+ * \note Its value must not be changed, hwloc_bitmap_dup must be used instead.
+ */
+ hwloc_cpuset_t online_cpuset; /**< \brief The CPU set of online logical processors
+ *
+ * This includes the CPUs contained in this object that are online, i.e. draw
+ * power and can execute threads. It may however not be allowed to bind to
+ * them due to administration rules, see allowed_cpuset.
+ *
+ * \note Its value must not be changed, hwloc_bitmap_dup must be used instead.
+ */
+ hwloc_cpuset_t allowed_cpuset; /**< \brief The CPU set of allowed logical processors
+ *
+ * This includes the CPUs contained in this object which are allowed for
+ * binding, i.e. passing them to the hwloc binding functions should not return
+ * permission errors. This is usually restricted by administration rules.
+ * Some of them may however be offline so binding to them may still not be
+ * possible, see online_cpuset.
+ *
+ * \note Its value must not be changed, hwloc_bitmap_dup must be used instead.
+ */
+
+ hwloc_nodeset_t nodeset; /**< \brief NUMA nodes covered by this object or containing this object
+ *
+ * This is the set of NUMA nodes for which there are NODE objects in the
+ * topology under or above this object, i.e. which are known to be physically
+ * contained in this object or containing it and known how (the children path
+ * between this object and the NODE objects).
+ *
+ * In the end, these nodes are those that are close to the current object.
+ *
+ * If the HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM configuration flag is set, some of
+ * these nodes may not be allowed for allocation, see allowed_nodeset.
+ *
+ * If there are no NUMA nodes in the machine, all the memory is close to this
+ * object, so \p nodeset is full.
+ *
+ * \note Its value must not be changed, hwloc_bitmap_dup must be used instead.
+ */
+ hwloc_nodeset_t complete_nodeset; /**< \brief The complete NUMA node set of this object,
+ *
+ * This includes not only the same as the nodeset field, but also the NUMA
+ * nodes for which topology information is unknown or incomplete, and the nodes
+ * that are ignored when the HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM flag is not set.
+ * Thus no corresponding NODE object may be found in the topology, because the
+ * precise position is undefined. It is however known that it would be
+ * somewhere under this object.
+ *
+ * If there are no NUMA nodes in the machine, all the memory is close to this
+ * object, so \p complete_nodeset is full.
+ *
+ * \note Its value must not be changed, hwloc_bitmap_dup must be used instead.
+ */
+ hwloc_nodeset_t allowed_nodeset; /**< \brief The set of allowed NUMA memory nodes
+ *
+ * This includes the NUMA memory nodes contained in this object which are
+ * allowed for memory allocation, i.e. passing them to NUMA node-directed
+ * memory allocation should not return permission errors. This is usually
+ * restricted by administration rules.
+ *
+ * If there are no NUMA nodes in the machine, all the memory is close to this
+ * object, so \p allowed_nodeset is full.
+ *
+ * \note Its value must not be changed, hwloc_bitmap_dup must be used instead.
+ */
+
+ struct hwloc_distances_s **distances; /**< \brief Distances between all objects at same depth below this object */
+ unsigned distances_count;
+
+ struct hwloc_obj_info_s *infos; /**< \brief Array of stringified info type=name. */
+ unsigned infos_count; /**< \brief Size of infos array. */
+
+ int symmetric_subtree; /**< \brief Set if the subtree of objects below this object is symmetric,
+ * which means all children and their children have identical subtrees.
+ * If set in the topology root object, lstopo may export the topology
+ * as a synthetic string.
+ */
+};
+/**
+ * \brief Convenience typedef; a pointer to a struct hwloc_obj.
+ */
+typedef struct hwloc_obj * hwloc_obj_t;
+
+/** \brief Object type-specific Attributes */
+union hwloc_obj_attr_u {
+ /** \brief Cache-specific Object Attributes */
+ struct hwloc_cache_attr_s {
+ hwloc_uint64_t size; /**< \brief Size of cache in bytes */
+ unsigned depth; /**< \brief Depth of cache (e.g., L1, L2, ...etc.) */
+ unsigned linesize; /**< \brief Cache-line size in bytes. 0 if unknown */
+ int associativity; /**< \brief Ways of associativity,
+ * -1 if fully associative, 0 if unknown */
+ hwloc_obj_cache_type_t type; /**< \brief Cache type */
+ } cache;
+ /** \brief Group-specific Object Attributes */
+ struct hwloc_group_attr_s {
+ unsigned depth; /**< \brief Depth of group object */
+ } group;
+ /** \brief PCI Device specific Object Attributes */
+ struct hwloc_pcidev_attr_s {
+ unsigned short domain;
+ unsigned char bus, dev, func;
+ unsigned short class_id;
+ unsigned short vendor_id, device_id, subvendor_id, subdevice_id;
+ unsigned char revision;
+ float linkspeed; /* in GB/s */
+ } pcidev;
+ /** \brief Bridge specific Object Attribues */
+ struct hwloc_bridge_attr_s {
+ union {
+ struct hwloc_pcidev_attr_s pci;
+ } upstream;
+ hwloc_obj_bridge_type_t upstream_type;
+ union {
+ struct {
+ unsigned short domain;
+ unsigned char secondary_bus, subordinate_bus;
+ } pci;
+ } downstream;
+ hwloc_obj_bridge_type_t downstream_type;
+ unsigned depth;
+ } bridge;
+ /** \brief OS Device specific Object Attributes */
+ struct hwloc_osdev_attr_s {
+ hwloc_obj_osdev_type_t type;
+ } osdev;
+};
+
+/** \brief Distances between objects
+ *
+ * One object may contain a distance structure describing distances
+ * between all its descendants at a given relative depth. If the
+ * containing object is the root object of the topology, then the
+ * distances are available for all objects in the machine.
+ *
+ * If the \p latency pointer is not \c NULL, the pointed array contains
+ * memory latencies (non-zero values), as defined by the ACPI SLIT
+ * specification.
+ *
+ * In the future, some other types of distances may be considered.
+ * In these cases, \p latency may be \c NULL.
+ */
+struct hwloc_distances_s {
+ unsigned relative_depth; /**< \brief Relative depth of the considered objects
+ * below the object containing this distance information. */
+ unsigned nbobjs; /**< \brief Number of objects considered in the matrix.
+ * It is the number of descendant objects at \p relative_depth
+ * below the containing object.
+ * It corresponds to the result of hwloc_get_nbobjs_inside_cpuset_by_depth. */
+
+ float *latency; /**< \brief Matrix of latencies between objects, stored as a one-dimension array.
+ * May be \c NULL if the distances considered here are not latencies.
+ * Values are normalized to get 1.0 as the minimal value in the matrix.
+ * Latency from i-th to j-th object is stored in slot i*nbobjs+j.
+ */
+ float latency_max; /**< \brief The maximal value in the latency matrix. */
+ float latency_base; /**< \brief The multiplier that should be applied to latency matrix
+ * to retrieve the original OS-provided latencies.
+ * Usually 10 on Linux since ACPI SLIT uses 10 for local latency.
+ */
+};
+
+/** \brief Object info */
+struct hwloc_obj_info_s {
+ char *name; /**< \brief Info name */
+ char *value; /**< \brief Info value */
+};
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_creation Topology Creation and Destruction
+ * @{
+ */
+
+struct hwloc_topology;
+/** \brief Topology context
+ *
+ * To be initialized with hwloc_topology_init() and built with hwloc_topology_load().
+ */
+typedef struct hwloc_topology * hwloc_topology_t;
+
+/** \brief Allocate a topology context.
+ *
+ * \param[out] topologyp is assigned a pointer to the new allocated context.
+ *
+ * \return 0 on success, -1 on error.
+ */
+HWLOC_DECLSPEC int hwloc_topology_init (hwloc_topology_t *topologyp);
+
+/** \brief Build the actual topology
+ *
+ * Build the actual topology once initialized with hwloc_topology_init() and
+ * tuned with \ref hwlocality_configuration routines.
+ * No other routine may be called earlier using this topology context.
+ *
+ * \param topology is the topology to be loaded with objects.
+ *
+ * \return 0 on success, -1 on error.
+ *
+ * \note On failure, the topology is reinitialized. It should be either
+ * destroyed with hwloc_topology_destroy() or configured and loaded again.
+ *
+ * \note This function may be called only once per topology.
+ *
+ * \sa hwlocality_configuration
+ */
+HWLOC_DECLSPEC int hwloc_topology_load(hwloc_topology_t topology);
+
+/** \brief Terminate and free a topology context
+ *
+ * \param topology is the topology to be freed
+ */
+HWLOC_DECLSPEC void hwloc_topology_destroy (hwloc_topology_t topology);
+
+/** \brief Run internal checks on a topology structure
+ *
+ * The program aborts if an inconsistency is detected in the given topology.
+ *
+ * \param topology is the topology to be checked
+ *
+ * \note This routine is only useful to developers.
+ *
+ * \note The input topology should have been previously loaded with
+ * hwloc_topology_load().
+ */
+HWLOC_DECLSPEC void hwloc_topology_check(hwloc_topology_t topology);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_configuration Topology Detection Configuration and Query
+ *
+ * Several functions can optionally be called between hwloc_topology_init() and
+ * hwloc_topology_load() to configure how the detection should be performed,
+ * e.g. to ignore some objects types, define a synthetic topology, etc.
+ *
+ * If none of them is called, the default is to detect all the objects of the
+ * machine that the caller is allowed to access.
+ *
+ * This default behavior may also be modified through environment variables
+ * if the application did not modify it already.
+ * Setting HWLOC_XMLFILE in the environment enforces the discovery from a XML
+ * file as if hwloc_topology_set_xml() had been called.
+ * HWLOC_FSROOT switches to reading the topology from the specified Linux
+ * filesystem root as if hwloc_topology_set_fsroot() had been called.
+ * Finally, HWLOC_THISSYSTEM enforces the return value of
+ * hwloc_topology_is_thissystem().
+ *
+ * @{
+ */
+
+/** \brief Ignore an object type.
+ *
+ * Ignore all objects from the given type.
+ * The bottom-level type HWLOC_OBJ_PU may not be ignored.
+ * The top-level object of the hierarchy will never be ignored, even if this function
+ * succeeds.
+ * I/O objects may not be ignored, topology flags should be used to configure
+ * their discovery instead.
+ */
+HWLOC_DECLSPEC int hwloc_topology_ignore_type(hwloc_topology_t topology, hwloc_obj_type_t type);
+
+/** \brief Ignore an object type if it does not bring any structure.
+ *
+ * Ignore all objects from the given type as long as they do not bring any structure:
+ * Each ignored object should have a single children or be the only child of its parent.
+ * The bottom-level type HWLOC_OBJ_PU may not be ignored.
+ * I/O objects may not be ignored, topology flags should be used to configure
+ * their discovery instead.
+ */
+HWLOC_DECLSPEC int hwloc_topology_ignore_type_keep_structure(hwloc_topology_t topology, hwloc_obj_type_t type);
+
+/** \brief Ignore all objects that do not bring any structure.
+ *
+ * Ignore all objects that do not bring any structure:
+ * Each ignored object should have a single children or be the only child of its parent.
+ * I/O objects may not be ignored, topology flags should be used to configure
+ * their discovery instead.
+ */
+HWLOC_DECLSPEC int hwloc_topology_ignore_all_keep_structure(hwloc_topology_t topology);
+
+/** \brief Flags to be set onto a topology context before load.
+ *
+ * Flags should be given to hwloc_topology_set_flags().
+ * They may also be returned by hwloc_topology_get_flags().
+ */
+enum hwloc_topology_flags_e {
+ /** \brief Detect the whole system, ignore reservations and offline settings.
+ *
+ * Gather all resources, even if some were disabled by the administrator.
+ * For instance, ignore Linux Cpusets and gather all processors and memory nodes,
+ * and ignore the fact that some resources may be offline.
+ * \hideinitializer
+ */
+ HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM = (1UL<<0),
+
+ /** \brief Assume that the selected backend provides the topology for the
+ * system on which we are running.
+ *
+ * This forces hwloc_topology_is_thissystem to return 1, i.e. makes hwloc assume that
+ * the selected backend provides the topology for the system on which we are running,
+ * even if it is not the OS-specific backend but the XML backend for instance.
+ * This means making the binding functions actually call the OS-specific
+ * system calls and really do binding, while the XML backend would otherwise
+ * provide empty hooks just returning success.
+ *
+ * Setting the environment variable HWLOC_THISSYSTEM may also result in the
+ * same behavior.
+ *
+ * This can be used for efficiency reasons to first detect the topology once,
+ * save it to an XML file, and quickly reload it later through the XML
+ * backend, but still having binding functions actually do bind.
+ * \hideinitializer
+ */
+ HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM = (1UL<<1),
+
+ /** \brief Detect PCI devices.
+ *
+ * By default, I/O devices are ignored. This flag enables I/O device
+ * detection using the pci backend. Only the common PCI devices (GPUs,
+ * NICs, block devices, ...) and host bridges (objects that connect the host
+ * objects to an I/O subsystem) will be added to the topology.
+ * Uncommon devices and other bridges (such as PCI-to-PCI bridges) will be
+ * ignored.
+ * \hideinitializer
+ */
+ HWLOC_TOPOLOGY_FLAG_IO_DEVICES = (1UL<<2),
+
+ /** \brief Detect PCI bridges.
+ *
+ * This flag should be combined with HWLOC_TOPOLOGY_FLAG_IO_DEVICES to enable
+ * the detection of both common devices and of all useful bridges (bridges that
+ * have at least one device behind them).
+ * \hideinitializer
+ */
+ HWLOC_TOPOLOGY_FLAG_IO_BRIDGES = (1UL<<3),
+
+ /** \brief Detect the whole PCI hierarchy.
+ *
+ * This flag enables detection of all I/O devices (even the uncommon ones)
+ * and bridges (even those that have no device behind them) using the pci
+ * backend.
+ * \hideinitializer
+ */
+ HWLOC_TOPOLOGY_FLAG_WHOLE_IO = (1UL<<4),
+
+ /** \brief Detect instruction caches.
+ *
+ * This flag enables detection of Instruction caches,
+ * instead of only Data and Unified caches.
+ * \hideinitializer
+ */
+ HWLOC_TOPOLOGY_FLAG_ICACHES = (1UL<<5)
+};
+
+/** \brief Set OR'ed flags to non-yet-loaded topology.
+ *
+ * Set a OR'ed set of ::hwloc_topology_flags_e onto a topology that was not yet loaded.
+ *
+ * If this function is called multiple times, the last invokation will erase
+ * and replace the set of flags that was previously set.
+ *
+ * The flags set in a topology may be retrieved with hwloc_topology_get_flags()
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_flags (hwloc_topology_t topology, unsigned long flags);
+
+/** \brief Get OR'ed flags of a topology.
+ *
+ * Get the OR'ed set of ::hwloc_topology_flags_e of a topology.
+ *
+ * \return the flags previously set with hwloc_topology_set_flags().
+ */
+HWLOC_DECLSPEC unsigned long hwloc_topology_get_flags (hwloc_topology_t topology);
+
+/** \brief Change which pid the topology is viewed from
+ *
+ * On some systems, processes may have different views of the machine, for
+ * instance the set of allowed CPUs. By default, hwloc exposes the view from
+ * the current process. Calling hwloc_topology_set_pid() permits to make it
+ * expose the topology of the machine from the point of view of another
+ * process.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note -1 is returned and errno is set to ENOSYS on platforms that do not
+ * support this feature.
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_pid(hwloc_topology_t __hwloc_restrict topology, hwloc_pid_t pid);
+
+/** \brief Change the file-system root path when building the topology from sysfs/procfs.
+ *
+ * On Linux system, use sysfs and procfs files as if they were mounted on the given
+ * \p fsroot_path instead of the main file-system root. Setting the environment
+ * variable HWLOC_FSROOT may also result in this behavior.
+ * Not using the main file-system root causes hwloc_topology_is_thissystem()
+ * to return 0.
+ *
+ * Note that this function does not actually load topology
+ * information; it just tells hwloc where to load it from. You'll
+ * still need to invoke hwloc_topology_load() to actually load the
+ * topology information.
+ *
+ * \return -1 with errno set to ENOSYS on non-Linux and on Linux systems that
+ * do not support it.
+ * \return -1 with the appropriate errno if \p fsroot_path cannot be used.
+ *
+ * \note For convenience, this backend provides empty binding hooks which just
+ * return success. To have hwloc still actually call OS-specific hooks, the
+ * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
+ * file is really the underlying system.
+ *
+ * \note On success, the Linux component replaces the previously enabled
+ * component (if any), but the topology is not actually modified until
+ * hwloc_topology_load().
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_fsroot(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict fsroot_path);
+
+/** \brief Enable synthetic topology.
+ *
+ * Gather topology information from the given \p description,
+ * a space-separated string of numbers describing
+ * the arity of each level.
+ * Each number may be prefixed with a type and a colon to enforce the type
+ * of a level. If only some level types are enforced, hwloc will try to
+ * choose the other types according to usual topologies, but it may fail
+ * and you may have to specify more level types manually.
+ * See also the \ref synthetic.
+ *
+ * If \p description was properly parsed and describes a valid topology
+ * configuration, this function returns 0.
+ * Otherwise -1 is returned and errno is set to EINVAL.
+ *
+ * Note that this function does not actually load topology
+ * information; it just tells hwloc where to load it from. You'll
+ * still need to invoke hwloc_topology_load() to actually load the
+ * topology information.
+ *
+ * \note For convenience, this backend provides empty binding hooks which just
+ * return success.
+ *
+ * \note On success, the synthetic component replaces the previously enabled
+ * component (if any), but the topology is not actually modified until
+ * hwloc_topology_load().
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_synthetic(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict description);
+
+/** \brief Enable XML-file based topology.
+ *
+ * Gather topology information from the XML file given at \p xmlpath.
+ * Setting the environment variable HWLOC_XMLFILE may also result in this behavior.
+ * This file may have been generated earlier with hwloc_topology_export_xml()
+ * or lstopo file.xml.
+ *
+ * Note that this function does not actually load topology
+ * information; it just tells hwloc where to load it from. You'll
+ * still need to invoke hwloc_topology_load() to actually load the
+ * topology information.
+ *
+ * \return -1 with errno set to EINVAL on failure to read the XML file.
+ *
+ * \note See also hwloc_topology_set_userdata_import_callback()
+ * for importing application-specific userdata.
+ *
+ * \note For convenience, this backend provides empty binding hooks which just
+ * return success. To have hwloc still actually call OS-specific hooks, the
+ * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
+ * file is really the underlying system.
+ *
+ * \note On success, the XML component replaces the previously enabled
+ * component (if any), but the topology is not actually modified until
+ * hwloc_topology_load().
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_xml(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict xmlpath);
+
+/** \brief Enable XML based topology using a memory buffer (instead of
+ * a file, as with hwloc_topology_set_xml()).
+ *
+ * Gather topology information from the XML memory buffer given at \p
+ * buffer and of length \p size. This buffer may have been filled
+ * earlier with hwloc_topology_export_xmlbuffer().
+ *
+ * Note that this function does not actually load topology
+ * information; it just tells hwloc where to load it from. You'll
+ * still need to invoke hwloc_topology_load() to actually load the
+ * topology information.
+ *
+ * \return -1 with errno set to EINVAL on failure to read the XML buffer.
+ *
+ * \note See also hwloc_topology_set_userdata_import_callback()
+ * for importing application-specific userdata.
+ *
+ * \note For convenience, this backend provides empty binding hooks which just
+ * return success. To have hwloc still actually call OS-specific hooks, the
+ * HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM has to be set to assert that the loaded
+ * file is really the underlying system.
+ *
+ * \note On success, the XML component replaces the previously enabled
+ * component (if any), but the topology is not actually modified until
+ * hwloc_topology_load().
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_xmlbuffer(hwloc_topology_t __hwloc_restrict topology, const char * __hwloc_restrict buffer, int size);
+
+/** \brief Prepare the topology for custom assembly.
+ *
+ * The topology then contains a single root object.
+ * It must then be built by inserting other topologies with
+ * hwloc_custom_insert_topology() or single objects with
+ * hwloc_custom_insert_group_object_by_parent().
+ * hwloc_topology_load() must be called to finalize the new
+ * topology as usual.
+ *
+ * \note If nothing is inserted in the topology,
+ * hwloc_topology_load() will fail with errno set to EINVAL.
+ *
+ * \note The cpuset and nodeset of the root object are NULL because
+ * these sets are meaningless when assembling multiple topologies.
+ *
+ * \note On success, the custom component replaces the previously enabled
+ * component (if any), but the topology is not actually modified until
+ * hwloc_topology_load().
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_custom(hwloc_topology_t topology);
+
+/** \brief Provide a distance matrix.
+ *
+ * Provide the matrix of distances between a set of objects of the given type.
+ * The set may or may not contain all the existing objects of this type.
+ * The objects are specified by their OS/physical index in the \p os_index
+ * array. The \p distances matrix follows the same order.
+ * The distance from object i to object j in the i*nbobjs+j.
+ *
+ * A single latency matrix may be defined for each type.
+ * If another distance matrix already exists for the given type,
+ * either because the user specified it or because the OS offers it,
+ * it will be replaced by the given one.
+ * If \p nbobjs is \c 0, \p os_index is \c NULL and \p distances is \c NULL,
+ * the existing distance matrix for the given type is removed.
+ *
+ * \note Distance matrices are ignored in multi-node topologies.
+ */
+HWLOC_DECLSPEC int hwloc_topology_set_distance_matrix(hwloc_topology_t __hwloc_restrict topology,
+ hwloc_obj_type_t type, unsigned nbobjs,
+ unsigned *os_index, float *distances);
+
+/** \brief Does the topology context come from this system?
+ *
+ * \return 1 if this topology context was built using the system
+ * running this program.
+ * \return 0 instead (for instance if using another file-system root,
+ * a XML topology file, or a synthetic topology).
+ */
+HWLOC_DECLSPEC int hwloc_topology_is_thissystem(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure;
+
+/** \brief Flags describing actual discovery support for this topology. */
+struct hwloc_topology_discovery_support {
+ /** \brief Detecting the number of PU objects is supported. */
+ unsigned char pu;
+};
+
+/** \brief Flags describing actual PU binding support for this topology. */
+struct hwloc_topology_cpubind_support {
+ /** Binding the whole current process is supported. */
+ unsigned char set_thisproc_cpubind;
+ /** Getting the binding of the whole current process is supported. */
+ unsigned char get_thisproc_cpubind;
+ /** Binding a whole given process is supported. */
+ unsigned char set_proc_cpubind;
+ /** Getting the binding of a whole given process is supported. */
+ unsigned char get_proc_cpubind;
+ /** Binding the current thread only is supported. */
+ unsigned char set_thisthread_cpubind;
+ /** Getting the binding of the current thread only is supported. */
+ unsigned char get_thisthread_cpubind;
+ /** Binding a given thread only is supported. */
+ unsigned char set_thread_cpubind;
+ /** Getting the binding of a given thread only is supported. */
+ unsigned char get_thread_cpubind;
+ /** Getting the last processors where the whole current process ran is supported */
+ unsigned char get_thisproc_last_cpu_location;
+ /** Getting the last processors where a whole process ran is supported */
+ unsigned char get_proc_last_cpu_location;
+ /** Getting the last processors where the current thread ran is supported */
+ unsigned char get_thisthread_last_cpu_location;
+};
+
+/** \brief Flags describing actual memory binding support for this topology. */
+struct hwloc_topology_membind_support {
+ /** Binding the whole current process is supported. */
+ unsigned char set_thisproc_membind;
+ /** Getting the binding of the whole current process is supported. */
+ unsigned char get_thisproc_membind;
+ /** Binding a whole given process is supported. */
+ unsigned char set_proc_membind;
+ /** Getting the binding of a whole given process is supported. */
+ unsigned char get_proc_membind;
+ /** Binding the current thread only is supported. */
+ unsigned char set_thisthread_membind;
+ /** Getting the binding of the current thread only is supported. */
+ unsigned char get_thisthread_membind;
+ /** Binding a given memory area is supported. */
+ unsigned char set_area_membind;
+ /** Getting the binding of a given memory area is supported. */
+ unsigned char get_area_membind;
+ /** Allocating a bound memory area is supported. */
+ unsigned char alloc_membind;
+ /** First-touch policy is supported. */
+ unsigned char firsttouch_membind;
+ /** Bind policy is supported. */
+ unsigned char bind_membind;
+ /** Interleave policy is supported. */
+ unsigned char interleave_membind;
+ /** Replication policy is supported. */
+ unsigned char replicate_membind;
+ /** Next-touch migration policy is supported. */
+ unsigned char nexttouch_membind;
+
+ /** Migration flags is supported. */
+ unsigned char migrate_membind;
+};
+
+/** \brief Set of flags describing actual support for this topology.
+ *
+ * This is retrieved with hwloc_topology_get_support() and will be valid until
+ * the topology object is destroyed. Note: the values are correct only after
+ * discovery.
+ */
+struct hwloc_topology_support {
+ struct hwloc_topology_discovery_support *discovery;
+ struct hwloc_topology_cpubind_support *cpubind;
+ struct hwloc_topology_membind_support *membind;
+};
+
+/** \brief Retrieve the topology support. */
+HWLOC_DECLSPEC const struct hwloc_topology_support *hwloc_topology_get_support(hwloc_topology_t __hwloc_restrict topology);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_levels Object levels, depths and types
+ * @{
+ *
+ * Be sure to see the figure in \ref termsanddefs that shows a
+ * complete topology tree, including depths, child/sibling/cousin
+ * relationships, and an example of an asymmetric topology where one
+ * socket has fewer caches than its peers.
+ */
+
+/** \brief Get the depth of the hierarchical tree of objects.
+ *
+ * This is the depth of HWLOC_OBJ_PU objects plus one.
+ */
+HWLOC_DECLSPEC unsigned hwloc_topology_get_depth(hwloc_topology_t __hwloc_restrict topology) __hwloc_attribute_pure;
+
+/** \brief Returns the depth of objects of type \p type.
+ *
+ * If no object of this type is present on the underlying architecture, or if
+ * the OS doesn't provide this kind of information, the function returns
+ * HWLOC_TYPE_DEPTH_UNKNOWN.
+ *
+ * If type is absent but a similar type is acceptable, see also
+ * hwloc_get_type_or_below_depth() and hwloc_get_type_or_above_depth().
+ *
+ * If some objects of the given type exist in different levels,
+ * for instance L1 and L2 caches, or L1i and L1d caches,
+ * the function returns HWLOC_TYPE_DEPTH_MULTIPLE.
+ * See hwloc_get_cache_type_depth() in hwloc/helper.h to better handle this
+ * case.
+ *
+ * If an I/O object type is given, the function returns a virtual value
+ * because I/O objects are stored in special levels that are not CPU-related.
+ * This virtual depth may be passed to other hwloc functions such as
+ * hwloc_get_obj_by_depth() but it should not be considered as an actual
+ * depth by the application. In particular, it should not be compared with
+ * any other object depth or with the entire topology depth.
+ */
+HWLOC_DECLSPEC int hwloc_get_type_depth (hwloc_topology_t topology, hwloc_obj_type_t type);
+
+enum hwloc_get_type_depth_e {
+ HWLOC_TYPE_DEPTH_UNKNOWN = -1, /**< \brief No object of given type exists in the topology. \hideinitializer */
+ HWLOC_TYPE_DEPTH_MULTIPLE = -2, /**< \brief Objects of given type exist at different depth in the topology. \hideinitializer */
+ HWLOC_TYPE_DEPTH_BRIDGE = -3, /**< \brief Virtual depth for bridge object level. \hideinitializer */
+ HWLOC_TYPE_DEPTH_PCI_DEVICE = -4, /**< \brief Virtual depth for PCI device object level. \hideinitializer */
+ HWLOC_TYPE_DEPTH_OS_DEVICE = -5 /**< \brief Virtual depth for software device object level. \hideinitializer */
+};
+
+/** \brief Returns the depth of objects of type \p type or below
+ *
+ * If no object of this type is present on the underlying architecture, the
+ * function returns the depth of the first "present" object typically found
+ * inside \p type.
+ *
+ * If some objects of the given type exist in different levels, for instance
+ * L1 and L2 caches, the function returns HWLOC_TYPE_DEPTH_MULTIPLE.
+ */
+static __hwloc_inline int
+hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure;
+
+/** \brief Returns the depth of objects of type \p type or above
+ *
+ * If no object of this type is present on the underlying architecture, the
+ * function returns the depth of the first "present" object typically
+ * containing \p type.
+ *
+ * If some objects of the given type exist in different levels, for instance
+ * L1 and L2 caches, the function returns HWLOC_TYPE_DEPTH_MULTIPLE.
+ */
+static __hwloc_inline int
+hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure;
+
+/** \brief Returns the type of objects at depth \p depth.
+ *
+ * \return -1 if depth \p depth does not exist.
+ */
+HWLOC_DECLSPEC hwloc_obj_type_t hwloc_get_depth_type (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure;
+
+/** \brief Returns the width of level at depth \p depth.
+ */
+HWLOC_DECLSPEC unsigned hwloc_get_nbobjs_by_depth (hwloc_topology_t topology, unsigned depth) __hwloc_attribute_pure;
+
+/** \brief Returns the width of level type \p type
+ *
+ * If no object for that type exists, 0 is returned.
+ * If there are several levels with objects of that type, -1 is returned.
+ */
+static __hwloc_inline int
+hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type) __hwloc_attribute_pure;
+
+/** \brief Returns the top-object of the topology-tree.
+ *
+ * Its type is typically ::HWLOC_OBJ_MACHINE but it could be different
+ * for complex topologies.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_root_obj (hwloc_topology_t topology) __hwloc_attribute_pure;
+
+/** \brief Returns the topology object at logical index \p idx from depth \p depth */
+HWLOC_DECLSPEC hwloc_obj_t hwloc_get_obj_by_depth (hwloc_topology_t topology, unsigned depth, unsigned idx) __hwloc_attribute_pure;
+
+/** \brief Returns the topology object at logical index \p idx with type \p type
+ *
+ * If no object for that type exists, \c NULL is returned.
+ * If there are several levels with objects of that type, \c NULL is returned
+ * and ther caller may fallback to hwloc_get_obj_by_depth().
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure;
+
+/** \brief Returns the next object at depth \p depth.
+ *
+ * If \p prev is \c NULL, return the first object at depth \p depth.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev);
+
+/** \brief Returns the next object of type \p type.
+ *
+ * If \p prev is \c NULL, return the first object at type \p type. If
+ * there are multiple or no depth for given type, return \c NULL and
+ * let the caller fallback to hwloc_get_next_obj_by_depth().
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type,
+ hwloc_obj_t prev);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_object_strings Manipulating Object Type, Sets and Attributes as Strings
+ * @{
+ */
+
+/** \brief Return a stringified topology object type */
+HWLOC_DECLSPEC const char * hwloc_obj_type_string (hwloc_obj_type_t type) __hwloc_attribute_const;
+
+/** \brief Return an object type from the string
+ *
+ * \return -1 if unrecognized.
+ */
+HWLOC_DECLSPEC hwloc_obj_type_t hwloc_obj_type_of_string (const char * string) __hwloc_attribute_pure;
+
+/** \brief Stringify the type of a given topology object into a human-readable form.
+ *
+ * It differs from hwloc_obj_type_string() because it prints type attributes such
+ * as cache depth and type.
+ *
+ * If \p size is 0, \p string may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj,
+ int verbose);
+
+/** \brief Stringify the attributes of a given topology object into a human-readable form.
+ *
+ * Attribute values are separated by \p separator.
+ *
+ * Only the major attributes are printed in non-verbose mode.
+ *
+ * If \p size is 0, \p string may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t obj, const char * __hwloc_restrict separator,
+ int verbose);
+
+/** \brief Stringify the cpuset containing a set of objects.
+ *
+ * If \p size is 0, \p string may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_obj_cpuset_snprintf(char * __hwloc_restrict str, size_t size, size_t nobj, const hwloc_obj_t * __hwloc_restrict objs);
+
+/** \brief Search the given key name in object infos and return the corresponding value.
+ *
+ * If multiple keys match the given name, only the first one is returned.
+ *
+ * \return \c NULL if no such key exists.
+ */
+static __hwloc_inline const char *
+hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name) __hwloc_attribute_pure;
+
+/** \brief Add the given info name and value pair to the given object.
+ *
+ * The info is appended to the existing info array even if another key
+ * with the same name already exists.
+ *
+ * The input strings are copied before being added in the object infos.
+ *
+ * \note This function may be used to enforce object colors in the lstopo
+ * graphical output by using "lstopoStyle" as a name and "Background=#rrggbb"
+ * as a value. See CUSTOM COLORS in the lstopo(1) manpage for details.
+ *
+ * \note If \p value contains some non-printable characters, they will
+ * be dropped when exporting to XML, see hwloc_topology_export_xml().
+ */
+HWLOC_DECLSPEC void hwloc_obj_add_info(hwloc_obj_t obj, const char *name, const char *value);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_cpubinding CPU binding
+ *
+ * It is often useful to call hwloc_bitmap_singlify() first so that a single CPU
+ * remains in the set. This way, the process will not even migrate between
+ * different CPUs. Some operating systems also only support that kind of binding.
+ *
+ * \note Some operating systems do not provide all hwloc-supported
+ * mechanisms to bind processes, threads, etc. and the corresponding
+ * binding functions may fail. -1 is returned and errno is set to
+ * ENOSYS when it is not possible to bind the requested kind of object
+ * processes/threads. errno is set to EXDEV when the requested cpuset
+ * can not be enforced (e.g. some systems only allow one CPU, and some
+ * other systems only allow one NUMA node).
+ *
+ * The most portable version that should be preferred over the others, whenever
+ * possible, is
+ *
+ * \code
+ * hwloc_set_cpubind(topology, set, 0),
+ * \endcode
+ *
+ * as it just binds the current program, assuming it is single-threaded, or
+ *
+ * \code
+ * hwloc_set_cpubind(topology, set, HWLOC_CPUBIND_THREAD),
+ * \endcode
+ *
+ * which binds the current thread of the current program (which may be
+ * multithreaded).
+ *
+ * \note To unbind, just call the binding function with either a full cpuset or
+ * a cpuset equal to the system cpuset.
+ *
+ * \note On some operating systems, CPU binding may have effects on memory binding, see
+ * ::HWLOC_CPUBIND_NOMEMBIND
+ *
+ * Running lstopo --top can be a very convenient tool to check how binding
+ * actually happened.
+ * @{
+ */
+
+/** \brief Process/Thread binding flags.
+ *
+ * These bit flags can be used to refine the binding policy.
+ *
+ * The default (0) is to bind the current process, assumed to be
+ * single-threaded, in a non-strict way. This is the most portable
+ * way to bind as all operating systems usually provide it.
+ *
+ * \note Not all systems support all kinds of binding. See the
+ * "Detailed Description" section of \ref hwlocality_cpubinding for a
+ * description of errors that can occur.
+ */
+typedef enum {
+ /** \brief Bind all threads of the current (possibly) multithreaded process.
+ * \hideinitializer */
+ HWLOC_CPUBIND_PROCESS = (1<<0),
+
+ /** \brief Bind current thread of current process.
+ * \hideinitializer */
+ HWLOC_CPUBIND_THREAD = (1<<1),
+
+ /** \brief Request for strict binding from the OS.
+ *
+ * By default, when the designated CPUs are all busy while other
+ * CPUs are idle, operating systems may execute the thread/process
+ * on those other CPUs instead of the designated CPUs, to let them
+ * progress anyway. Strict binding means that the thread/process
+ * will _never_ execute on other cpus than the designated CPUs, even
+ * when those are busy with other tasks and other CPUs are idle.
+ *
+ * \note Depending on the operating system, strict binding may not
+ * be possible (e.g., the OS does not implement it) or not allowed
+ * (e.g., for an administrative reasons), and the function will fail
+ * in that case.
+ *
+ * When retrieving the binding of a process, this flag checks
+ * whether all its threads actually have the same binding. If the
+ * flag is not given, the binding of each thread will be
+ * accumulated.
+ *
+ * \note This flag is meaningless when retrieving the binding of a
+ * thread.
+ * \hideinitializer
+ */
+ HWLOC_CPUBIND_STRICT = (1<<2),
+
+ /** \brief Avoid any effect on memory binding
+ *
+ * On some operating systems, some CPU binding function would also
+ * bind the memory on the corresponding NUMA node. It is often not
+ * a problem for the application, but if it is, setting this flag
+ * will make hwloc avoid using OS functions that would also bind
+ * memory. This will however reduce the support of CPU bindings,
+ * i.e. potentially return -1 with errno set to ENOSYS in some
+ * cases.
+ *
+ * This flag is only meaningful when used with functions that set
+ * the CPU binding. It is ignored when used with functions that get
+ * CPU binding information.
+ * \hideinitializer
+ */
+ HWLOC_CPUBIND_NOMEMBIND = (1<<3)
+} hwloc_cpubind_flags_t;
+
+/** \brief Bind current process or thread on cpus given in physical bitmap \p set.
+ *
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ */
+HWLOC_DECLSPEC int hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
+
+/** \brief Get current process or thread binding.
+ *
+ * Writes into \p set the physical cpuset which the process or thread (according to \e
+ * flags) was last bound to.
+ */
+HWLOC_DECLSPEC int hwloc_get_cpubind(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+
+/** \brief Bind a process \p pid on cpus given in physical bitmap \p set.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note As a special case on Linux, if a tid (thread ID) is supplied
+ * instead of a pid (process ID) and HWLOC_CPUBIND_THREAD is passed in flags,
+ * the binding is applied to that specific thread.
+ *
+ * \note On non-Linux systems, HWLOC_CPUBIND_THREAD can not be used in \p flags.
+ */
+HWLOC_DECLSPEC int hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags);
+
+/** \brief Get the current physical binding of process \p pid.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note As a special case on Linux, if a tid (thread ID) is supplied
+ * instead of a pid (process ID) and HWLOC_CPUBIND_THREAD is passed in flags,
+ * the binding for that specific thread is returned.
+ *
+ * \note On non-Linux systems, HWLOC_CPUBIND_THREAD can not be used in \p flags.
+ */
+HWLOC_DECLSPEC int hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags);
+
+#ifdef hwloc_thread_t
+/** \brief Bind a thread \p thread on cpus given in physical bitmap \p set.
+ *
+ * \note \p hwloc_thread_t is \p pthread_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note HWLOC_CPUBIND_PROCESS can not be used in \p flags.
+ */
+HWLOC_DECLSPEC int hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_const_cpuset_t set, int flags);
+#endif
+
+#ifdef hwloc_thread_t
+/** \brief Get the current physical binding of thread \p tid.
+ *
+ * \note \p hwloc_thread_t is \p pthread_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note HWLOC_CPUBIND_PROCESS can not be used in \p flags.
+ */
+HWLOC_DECLSPEC int hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t thread, hwloc_cpuset_t set, int flags);
+#endif
+
+/** \brief Get the last physical CPU where the current process or thread ran.
+ *
+ * The operating system may move some tasks from one processor
+ * to another at any time according to their binding,
+ * so this function may return something that is already
+ * outdated.
+ *
+ * \p flags can include either HWLOC_CPUBIND_PROCESS or HWLOC_CPUBIND_THREAD to
+ * specify whether the query should be for the whole process (union of all CPUs
+ * on which all threads are running), or only the current thread. If the
+ * process is single-threaded, flags can be set to zero to let hwloc use
+ * whichever method is available on the underlying OS.
+ */
+HWLOC_DECLSPEC int hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+
+/** \brief Get the last physical CPU where a process ran.
+ *
+ * The operating system may move some tasks from one processor
+ * to another at any time according to their binding,
+ * so this function may return something that is already
+ * outdated.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ *
+ * \note As a special case on Linux, if a tid (thread ID) is supplied
+ * instead of a pid (process ID) and HWLOC_CPUBIND_THREAD is passed in flags,
+ * the last CPU location of that specific thread is returned.
+ *
+ * \note On non-Linux systems, HWLOC_CPUBIND_THREAD can not be used in \p flags.
+ */
+HWLOC_DECLSPEC int hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_membinding Memory binding
+ *
+ * Memory binding can be done three ways:
+ *
+ * - explicit memory allocation thanks to hwloc_alloc_membind and friends: the
+ * binding will have effect on the memory allocated by these functions.
+ * - implicit memory binding through binding policy: hwloc_set_membind and
+ * friends only define the current policy of the process, which will be
+ * applied to the subsequent calls to malloc() and friends.
+ * - migration of existing memory ranges, thanks to hwloc_set_area_membind()
+ * and friends, which move already-allocated data.
+ *
+ * \note Not all operating systems support all three ways Using a binding flag
+ * or policy that is not supported by the underlying OS will cause hwloc's
+ * binding functions to fail and return -1. errno will be set to
+ * ENOSYS when the system does support the specified action or policy
+ * (e.g., some systems only allow binding memory on a per-thread
+ * basis, whereas other systems only allow binding memory for all
+ * threads in a process). errno will be set to EXDEV when the
+ * requested cpuset can not be enforced (e.g., some systems only allow
+ * binding memory to a single NUMA node).
+ *
+ * The most portable form that should be preferred over the others
+ * whenever possible is as follows:
+ *
+ * \code
+ * hwloc_alloc_membind_policy(topology, size, set,
+ * HWLOC_MEMBIND_DEFAULT, 0);
+ * \endcode
+ *
+ * This will allocate some memory hopefully bound to the specified set.
+ * To do so, hwloc will possibly have to change the current memory
+ * binding policy in order to actually get the memory bound, if the OS
+ * does not provide any other way to simply allocate bound memory
+ * without changing the policy for all allocations. That is the
+ * difference with hwloc_alloc_membind(), which will never change the
+ * current memory binding policy. Note that since HWLOC_MEMBIND_STRICT
+ * was not specified, failures to bind will not be reported --
+ * generally, only memory allocation failures will be reported (e.g.,
+ * even a plain malloc() would have failed with ENOMEM).
+ *
+ * Each hwloc memory binding function is available in two forms: one
+ * that takes a CPU set argument and another that takes a NUMA memory
+ * node set argument (see \ref hwlocality_object_sets and \ref
+ * hwlocality_bitmap for a discussion of CPU sets and NUMA memory node
+ * sets). The names of the latter form end with _nodeset. It is also
+ * possible to convert between CPU set and node set using
+ * hwloc_cpuset_to_nodeset() or hwloc_cpuset_from_nodeset().
+ *
+ * \note On some operating systems, memory binding affects the CPU
+ * binding; see ::HWLOC_MEMBIND_NOCPUBIND
+ * @{
+ */
+
+/** \brief Memory binding policy.
+ *
+ * These constants can be used to choose the binding policy. Only one policy can
+ * be used at a time (i.e., the values cannot be OR'ed together).
+ *
+ * \note Not all systems support all kinds of binding. See the
+ * "Detailed Description" section of \ref hwlocality_membinding for a
+ * description of errors that can occur.
+ */
+typedef enum {
+ /** \brief Reset the memory allocation policy to the system default.
+ * \hideinitializer */
+ HWLOC_MEMBIND_DEFAULT = 0,
+
+ /** \brief Allocate memory
+ * but do not immediately bind it to a specific locality. Instead,
+ * each page in the allocation is bound only when it is first
+ * touched. Pages are individually bound to the local NUMA node of
+ * the first thread that touches it. If there is not enough memory
+ * on the node, allocation may be done in the specified cpuset
+ * before allocating on other nodes.
+ * \hideinitializer */
+ HWLOC_MEMBIND_FIRSTTOUCH = 1,
+
+ /** \brief Allocate memory on the specified nodes.
+ * \hideinitializer */
+ HWLOC_MEMBIND_BIND = 2,
+
+ /** \brief Allocate memory on the given nodes in an interleaved
+ * / round-robin manner. The precise layout of the memory across
+ * multiple NUMA nodes is OS/system specific. Interleaving can be
+ * useful when threads distributed across the specified NUMA nodes
+ * will all be accessing the whole memory range concurrently, since
+ * the interleave will then balance the memory references.
+ * \hideinitializer */
+ HWLOC_MEMBIND_INTERLEAVE = 3,
+
+ /** \brief Replicate memory on the given nodes; reads from this
+ * memory will attempt to be serviced from the NUMA node local to
+ * the reading thread. Replicating can be useful when multiple
+ * threads from the specified NUMA nodes will be sharing the same
+ * read-only data.
+ *
+ * This policy can only be used with existing memory allocations
+ * (i.e., the hwloc_set_*membind*() functions); it cannot be used
+ * with functions that allocate new memory (i.e., the hwloc_alloc*()
+ * functions).
+ * \hideinitializer */
+ HWLOC_MEMBIND_REPLICATE = 4,
+
+ /** \brief For each page bound with this policy, by next time
+ * it is touched (and next time only), it is moved from its current
+ * location to the local NUMA node of the thread where the memory
+ * reference occurred (if it needs to be moved at all).
+ * \hideinitializer */
+ HWLOC_MEMBIND_NEXTTOUCH = 5,
+
+ /** \brief Returned by hwloc_get_membind*() functions when multiple
+ * threads or parts of a memory area have differing memory binding
+ * policies.
+ * \hideinitializer */
+ HWLOC_MEMBIND_MIXED = -1
+} hwloc_membind_policy_t;
+
+/** \brief Memory binding flags.
+ *
+ * These flags can be used to refine the binding policy. All flags
+ * can be logically OR'ed together with the exception of
+ * HWLOC_MEMBIND_PROCESS and HWLOC_MEMBIND_THREAD; these two flags are
+ * mutually exclusive.
+ *
+ * \note Not all systems support all kinds of binding. See the
+ * "Detailed Description" section of \ref hwlocality_membinding for a
+ * description of errors that can occur.
+ */
+typedef enum {
+ /** \brief Set policy for all threads of the specified (possibly
+ * multithreaded) process. This flag is mutually exclusive with
+ * HWLOC_MEMBIND_THREAD.
+ * \hideinitializer */
+ HWLOC_MEMBIND_PROCESS = (1<<0),
+
+ /** \brief Set policy for a specific thread of the current process.
+ * This flag is mutually exclusive with HWLOC_MEMBIND_PROCESS.
+ * \hideinitializer */
+ HWLOC_MEMBIND_THREAD = (1<<1),
+
+ /** Request strict binding from the OS. The function will fail if
+ * the binding can not be guaranteed / completely enforced.
+ *
+ * This flag has slightly different meanings depending on which
+ * function it is used with.
+ * \hideinitializer */
+ HWLOC_MEMBIND_STRICT = (1<<2),
+
+ /** \brief Migrate existing allocated memory. If the memory cannot
+ * be migrated and the HWLOC_MEMBIND_STRICT flag is passed, an error
+ * will be returned.
+ * \hideinitializer */
+ HWLOC_MEMBIND_MIGRATE = (1<<3),
+
+ /** \brief Avoid any effect on CPU binding.
+ *
+ * On some operating systems, some underlying memory binding
+ * functions also bind the application to the corresponding CPU(s).
+ * Using this flag will cause hwloc to avoid using OS functions that
+ * could potentially affect CPU bindings. Note, however, that using
+ * NOCPUBIND may reduce hwloc's overall memory binding
+ * support. Specifically: some of hwloc's memory binding functions
+ * may fail with errno set to ENOSYS when used with NOCPUBIND.
+ * \hideinitializer
+ */
+ HWLOC_MEMBIND_NOCPUBIND = (1<<4)
+} hwloc_membind_flags_t;
+
+/** \brief Set the default memory binding policy of the current
+ * process or thread to prefer the NUMA node(s) specified by physical \p nodeset
+ *
+ * If neither HWLOC_MEMBIND_PROCESS nor HWLOC_MEMBIND_THREAD is
+ * specified, the current process is assumed to be single-threaded.
+ * This is the most portable form as it permits hwloc to use either
+ * process-based OS functions or thread-based OS functions, depending
+ * on which are available.
+ *
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ */
+HWLOC_DECLSPEC int hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+
+/** \brief Set the default memory binding policy of the current
+ * process or thread to prefer the NUMA node(s) near the specified physical \p
+ * cpuset
+ *
+ * If neither HWLOC_MEMBIND_PROCESS nor HWLOC_MEMBIND_THREAD is
+ * specified, the current process is assumed to be single-threaded.
+ * This is the most portable form as it permits hwloc to use either
+ * process-based OS functions or thread-based OS functions, depending
+ * on which are available.
+ *
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ */
+HWLOC_DECLSPEC int hwloc_set_membind(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags);
+
+/** \brief Query the default memory binding policy and physical locality of the
+ * current process or thread.
+ *
+ * This function has two output parameters: \p nodeset and \p policy.
+ * The values returned in these parameters depend on both the \p flags
+ * passed in and the current memory binding policies and nodesets in
+ * the queried target.
+ *
+ * Passing the HWLOC_MEMBIND_PROCESS flag specifies that the query
+ * target is the current policies and nodesets for all the threads in
+ * the current process. Passing HWLOC_MEMBIND_THREAD specifies that
+ * the query target is the current policy and nodeset for only the
+ * thread invoking this function.
+ *
+ * If neither of these flags are passed (which is the most portable
+ * method), the process is assumed to be single threaded. This allows
+ * hwloc to use either process-based OS functions or thread-based OS
+ * functions, depending on which are available.
+ *
+ * HWLOC_MEMBIND_STRICT is only meaningful when HWLOC_MEMBIND_PROCESS
+ * is also specified. In this case, hwloc will check the default
+ * memory policies and nodesets for all threads in the process. If
+ * they are not identical, -1 is returned and errno is set to EXDEV.
+ * If they are identical, the values are returned in \p nodeset and \p
+ * policy.
+ *
+ * Otherwise, if HWLOC_MEMBIND_PROCESS is specified (and
+ * HWLOC_MEMBIND_STRICT is \em not specified), \p nodeset is set to
+ * the logical OR of all threads' default nodeset. If all threads'
+ * default policies are the same, \p policy is set to that policy. If
+ * they are different, \p policy is set to HWLOC_MEMBIND_MIXED.
+ *
+ * In the HWLOC_MEMBIND_THREAD case (or when neither
+ * HWLOC_MEMBIND_PROCESS or HWLOC_MEMBIND_THREAD is specified), there
+ * is only one nodeset and policy; they are returned in \p nodeset and
+ * \p policy, respectively.
+ *
+ * If any other flags are specified, -1 is returned and errno is set
+ * to EINVAL.
+ */
+HWLOC_DECLSPEC int hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+
+/** \brief Query the default memory binding policy and physical locality of the
+ * current process or thread (the locality is returned in \p cpuset as
+ * CPUs near the locality's actual NUMA node(s)).
+ *
+ * This function has two output parameters: \p cpuset and \p policy.
+ * The values returned in these parameters depend on both the \p flags
+ * passed in and the current memory binding policies and nodesets in
+ * the queried target.
+ *
+ * Passing the HWLOC_MEMBIND_PROCESS flag specifies that the query
+ * target is the current policies and nodesets for all the threads in
+ * the current process. Passing HWLOC_MEMBIND_THREAD specifies that
+ * the query target is the current policy and nodeset for only the
+ * thread invoking this function.
+ *
+ * If neither of these flags are passed (which is the most portable
+ * method), the process is assumed to be single threaded. This allows
+ * hwloc to use either process-based OS functions or thread-based OS
+ * functions, depending on which are available.
+ *
+ * HWLOC_MEMBIND_STRICT is only meaningful when HWLOC_MEMBIND_PROCESS
+ * is also specified. In this case, hwloc will check the default
+ * memory policies and nodesets for all threads in the process. If
+ * they are not identical, -1 is returned and errno is set to EXDEV.
+ * If they are identical, the policy is returned in \p policy. \p
+ * cpuset is set to the union of CPUs near the NUMA node(s) in the
+ * nodeset.
+ *
+ * Otherwise, if HWLOC_MEMBIND_PROCESS is specified (and
+ * HWLOC_MEMBIND_STRICT is \em not specified), the default nodeset
+ * from each thread is logically OR'ed together. \p cpuset is set to
+ * the union of CPUs near the NUMA node(s) in the resulting nodeset.
+ * If all threads' default policies are the same, \p policy is set to
+ * that policy. If they are different, \p policy is set to
+ * HWLOC_MEMBIND_MIXED.
+ *
+ * In the HWLOC_MEMBIND_THREAD case (or when neither
+ * HWLOC_MEMBIND_PROCESS or HWLOC_MEMBIND_THREAD is specified), there
+ * is only one nodeset and policy. The policy is returned in \p
+ * policy; \p cpuset is set to the union of CPUs near the NUMA node(s)
+ * in the \p nodeset.
+ *
+ * If any other flags are specified, -1 is returned and errno is set
+ * to EINVAL.
+ */
+HWLOC_DECLSPEC int hwloc_get_membind(hwloc_topology_t topology, hwloc_cpuset_t cpuset, hwloc_membind_policy_t * policy, int flags);
+
+/** \brief Set the default memory binding policy of the specified
+ * process to prefer the NUMA node(s) specified by physical \p nodeset
+ *
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ */
+HWLOC_DECLSPEC int hwloc_set_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+
+/** \brief Set the default memory binding policy of the specified
+ * process to prefer the NUMA node(s) near the specified physical \p cpuset
+ *
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ */
+HWLOC_DECLSPEC int hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags);
+
+/** \brief Query the default memory binding policy and physical locality of the
+ * specified process.
+ *
+ * This function has two output parameters: \p nodeset and \p policy.
+ * The values returned in these parameters depend on both the \p flags
+ * passed in and the current memory binding policies and nodesets in
+ * the queried target.
+ *
+ * Passing the HWLOC_MEMBIND_PROCESS flag specifies that the query
+ * target is the current policies and nodesets for all the threads in
+ * the specified process. If HWLOC_MEMBIND_PROCESS is not specified
+ * (which is the most portable method), the process is assumed to be
+ * single threaded. This allows hwloc to use either process-based OS
+ * functions or thread-based OS functions, depending on which are
+ * available.
+ *
+ * Note that it does not make sense to pass HWLOC_MEMBIND_THREAD to
+ * this function.
+ *
+ * If HWLOC_MEMBIND_STRICT is specified, hwloc will check the default
+ * memory policies and nodesets for all threads in the specified
+ * process. If they are not identical, -1 is returned and errno is
+ * set to EXDEV. If they are identical, the values are returned in \p
+ * nodeset and \p policy.
+ *
+ * Otherwise, \p nodeset is set to the logical OR of all threads'
+ * default nodeset. If all threads' default policies are the same, \p
+ * policy is set to that policy. If they are different, \p policy is
+ * set to HWLOC_MEMBIND_MIXED.
+ *
+ * If any other flags are specified, -1 is returned and errno is set
+ * to EINVAL.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ */
+HWLOC_DECLSPEC int hwloc_get_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+
+/** \brief Query the default memory binding policy and physical locality of the
+ * specified process (the locality is returned in \p cpuset as CPUs
+ * near the locality's actual NUMA node(s)).
+ *
+ * This function has two output parameters: \p cpuset and \p policy.
+ * The values returned in these parameters depend on both the \p flags
+ * passed in and the current memory binding policies and nodesets in
+ * the queried target.
+ *
+ * Passing the HWLOC_MEMBIND_PROCESS flag specifies that the query
+ * target is the current policies and nodesets for all the threads in
+ * the specified process. If HWLOC_MEMBIND_PROCESS is not specified
+ * (which is the most portable method), the process is assumed to be
+ * single threaded. This allows hwloc to use either process-based OS
+ * functions or thread-based OS functions, depending on which are
+ * available.
+ *
+ * Note that it does not make sense to pass HWLOC_MEMBIND_THREAD to
+ * this function.
+ *
+ * If HWLOC_MEMBIND_STRICT is specified, hwloc will check the default
+ * memory policies and nodesets for all threads in the specified
+ * process. If they are not identical, -1 is returned and errno is
+ * set to EXDEV. If they are identical, the policy is returned in \p
+ * policy. \p cpuset is set to the union of CPUs near the NUMA
+ * node(s) in the nodeset.
+ *
+ * Otherwise, the default nodeset from each thread is logically OR'ed
+ * together. \p cpuset is set to the union of CPUs near the NUMA
+ * node(s) in the resulting nodeset. If all threads' default policies
+ * are the same, \p policy is set to that policy. If they are
+ * different, \p policy is set to HWLOC_MEMBIND_MIXED.
+ *
+ * If any other flags are specified, -1 is returned and errno is set
+ * to EINVAL.
+ *
+ * \note \p hwloc_pid_t is \p pid_t on Unix platforms,
+ * and \p HANDLE on native Windows platforms.
+ */
+HWLOC_DECLSPEC int hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t cpuset, hwloc_membind_policy_t * policy, int flags);
+
+/** \brief Bind the already-allocated memory identified by (addr, len)
+ * to the NUMA node(s) in physical \p nodeset.
+ *
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ */
+HWLOC_DECLSPEC int hwloc_set_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+
+/** \brief Bind the already-allocated memory identified by (addr, len)
+ * to the NUMA node(s) near physical \p cpuset.
+ *
+ * \return -1 with errno set to ENOSYS if the action is not supported
+ * \return -1 with errno set to EXDEV if the binding cannot be enforced
+ */
+HWLOC_DECLSPEC int hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags);
+
+/** \brief Query the physical NUMA node(s) and binding policy of the memory
+ * identified by (\p addr, \p len ).
+ *
+ * This function has two output parameters: \p nodeset and \p policy.
+ * The values returned in these parameters depend on both the \p flags
+ * passed in and the memory binding policies and nodesets of the pages
+ * in the address range.
+ *
+ * If HWLOC_MEMBIND_STRICT is specified, the target pages are first
+ * checked to see if they all have the same memory binding policy and
+ * nodeset. If they do not, -1 is returned and errno is set to EXDEV.
+ * If they are identical across all pages, the nodeset and policy are
+ * returned in \p nodeset and \p policy, respectively.
+ *
+ * If HWLOC_MEMBIND_STRICT is not specified, \p nodeset is set to the
+ * union of all NUMA node(s) containing pages in the address range.
+ * If all pages in the target have the same policy, it is returned in
+ * \p policy. Otherwise, \p policy is set to HWLOC_MEMBIND_MIXED.
+ *
+ * If any other flags are specified, -1 is returned and errno is set
+ * to EINVAL.
+ */
+HWLOC_DECLSPEC int hwloc_get_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+
+/** \brief Query the CPUs near the physical NUMA node(s) and binding policy of
+ * the memory identified by (\p addr, \p len ).
+ *
+ * This function has two output parameters: \p cpuset and \p policy.
+ * The values returned in these parameters depend on both the \p flags
+ * passed in and the memory binding policies and nodesets of the pages
+ * in the address range.
+ *
+ * If HWLOC_MEMBIND_STRICT is specified, the target pages are first
+ * checked to see if they all have the same memory binding policy and
+ * nodeset. If they do not, -1 is returned and errno is set to EXDEV.
+ * If they are identical across all pages, the policy is returned in
+ * \p policy. \p cpuset is set to the union of CPUs near the NUMA
+ * node(s) in the nodeset.
+ *
+ * If HWLOC_MEMBIND_STRICT is not specified, the union of all NUMA
+ * node(s) containing pages in the address range is calculated. \p
+ * cpuset is then set to the CPUs near the NUMA node(s) in this union.
+ * If all pages in the target have the same policy, it is returned in
+ * \p policy. Otherwise, \p policy is set to HWLOC_MEMBIND_MIXED.
+ *
+ * If any other flags are specified, -1 is returned and errno is set
+ * to EINVAL.
+ */
+HWLOC_DECLSPEC int hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_cpuset_t cpuset, hwloc_membind_policy_t * policy, int flags);
+
+/** \brief Allocate some memory
+ *
+ * This is equivalent to malloc(), except that it tries to allocate
+ * page-aligned memory from the OS.
+ *
+ * \note The allocated memory should be freed with hwloc_free().
+ */
+HWLOC_DECLSPEC void *hwloc_alloc(hwloc_topology_t topology, size_t len);
+
+/** \brief Allocate some memory on the given physical nodeset \p nodeset
+ *
+ * \return NULL with errno set to ENOSYS if the action is not supported
+ * and HWLOC_MEMBIND_STRICT is given
+ * \return NULL with errno set to EXDEV if the binding cannot be enforced
+ * and HWLOC_MEMBIND_STRICT is given
+ *
+ * \note The allocated memory should be freed with hwloc_free().
+ */
+HWLOC_DECLSPEC void *hwloc_alloc_membind_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc;
+
+/** \brief Allocate some memory on memory nodes near the given physical cpuset \p cpuset
+ *
+ * \return NULL with errno set to ENOSYS if the action is not supported
+ * and HWLOC_MEMBIND_STRICT is given
+ * \return NULL with errno set to EXDEV if the binding cannot be enforced
+ * and HWLOC_MEMBIND_STRICT is given
+ *
+ * \note The allocated memory should be freed with hwloc_free().
+ */
+HWLOC_DECLSPEC void *hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc;
+
+/** \brief Allocate some memory on the given nodeset \p nodeset
+ *
+ * This is similar to hwloc_alloc_membind except that it is allowed to change
+ * the current memory binding policy, thus providing more binding support, at
+ * the expense of changing the current state.
+ */
+static __hwloc_inline void *
+hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc;
+
+/** \brief Allocate some memory on the memory nodes near given cpuset \p cpuset
+ *
+ * This is similar to hwloc_alloc_membind_policy_nodeset, but for a given cpuset.
+ */
+static __hwloc_inline void *
+hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags) __hwloc_attribute_malloc;
+
+/** \brief Free memory that was previously allocated by hwloc_alloc()
+ * or hwloc_alloc_membind().
+ */
+HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_tinker Modifying a loaded Topology
+ * @{
+ */
+
+/** \brief Add a MISC object to the topology
+ *
+ * A new MISC object will be created and inserted into the topology at the
+ * position given by bitmap \p cpuset. This offers a way to add new
+ * intermediate levels to the topology hierarchy.
+ *
+ * \p cpuset and \p name will be copied to setup the new object attributes.
+ *
+ * \return the newly-created object.
+ * \return \c NULL if the insertion conflicts with the existing topology tree.
+ *
+ * \note If \p name contains some non-printable characters, they will
+ * be dropped when exporting to XML, see hwloc_topology_export_xml().
+ */
+HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object_by_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, const char *name);
+
+/** \brief Add a MISC object as a leaf of the topology
+ *
+ * A new MISC object will be created and inserted into the topology at the
+ * position given by parent. It is appended to the list of existing children,
+ * without ever adding any intermediate hierarchy level. This is useful for
+ * annotating the topology without actually changing the hierarchy.
+ *
+ * \p name will be copied to the setup the new object attributes.
+ * However, the new leaf object will not have any \p cpuset.
+ *
+ * \return the newly-created object
+ *
+ * \note If \p name contains some non-printable characters, they will
+ * be dropped when exporting to XML, see hwloc_topology_export_xml().
+ */
+HWLOC_DECLSPEC hwloc_obj_t hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, const char *name);
+
+/** \brief Flags to be given to hwloc_topology_restrict(). */
+enum hwloc_restrict_flags_e {
+ /** \brief Adapt distance matrices according to objects being removed during restriction.
+ * If this flag is not set, distance matrices are removed.
+ * \hideinitializer
+ */
+ HWLOC_RESTRICT_FLAG_ADAPT_DISTANCES = (1<<0),
+
+ /** \brief Move Misc objects to ancestors if their parents are removed during restriction.
+ * If this flag is not set, Misc objects are removed when their parents are removed.
+ * \hideinitializer
+ */
+ HWLOC_RESTRICT_FLAG_ADAPT_MISC = (1<<1),
+
+ /** \brief Move I/O objects to ancestors if their parents are removed during restriction.
+ * If this flag is not set, I/O devices and bridges are removed when their parents are removed.
+ * \hideinitializer
+ */
+ HWLOC_RESTRICT_FLAG_ADAPT_IO = (1<<2)
+};
+
+/** \brief Restrict the topology to the given CPU set.
+ *
+ * Topology \p topology is modified so as to remove all objects that
+ * are not included (or partially included) in the CPU set \p cpuset.
+ * All objects CPU and node sets are restricted accordingly.
+ *
+ * \p flags is a OR'ed set of ::hwloc_restrict_flags_e.
+ *
+ * \note This call may not be reverted by restricting back to a larger
+ * cpuset. Once dropped during restriction, objects may not be brought
+ * back, except by loading another topology with hwloc_topology_load().
+ *
+ * \return 0 on success.
+ *
+ * \return -1 with errno set to EINVAL if the input cpuset is invalid.
+ * The topology is not modified in this case.
+ *
+ * \return -1 with errno set to ENOMEM on failure to allocate internal data.
+ * The topology is reinitialized in this case. It should be either
+ * destroyed with hwloc_topology_destroy() or configured and loaded again.
+ */
+HWLOC_DECLSPEC int hwloc_topology_restrict(hwloc_topology_t __hwloc_restrict topology, hwloc_const_cpuset_t cpuset, unsigned long flags);
+
+/** \brief Duplicate a topology.
+ *
+ * The entire topology structure as well as its objects
+ * are duplicated into a new one.
+ *
+ * This is useful for keeping a backup while modifying a topology.
+ */
+HWLOC_DECLSPEC int hwloc_topology_dup(hwloc_topology_t *newtopology, hwloc_topology_t oldtopology);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_custom Building Custom Topologies
+ *
+ * A custom topology may be initialized by calling hwloc_topology_set_custom()
+ * after hwloc_topology_init(). It may then be modified by inserting objects
+ * or entire topologies. Once done assembling, hwloc_topology_load() should
+ * be invoked as usual to finalize the topology.
+ * @{
+ */
+
+/** \brief Insert an existing topology inside a custom topology
+ *
+ * Duplicate the existing topology \p oldtopology inside a new
+ * custom topology \p newtopology as a leaf of object \p newparent.
+ *
+ * If \p oldroot is not \c NULL, duplicate \p oldroot and all its
+ * children instead of the entire \p oldtopology. Passing the root
+ * object of \p oldtopology in \p oldroot is equivalent to passing
+ * \c NULL.
+ *
+ * The custom topology \p newtopology must have been prepared with
+ * hwloc_topology_set_custom() and not loaded with hwloc_topology_load()
+ * yet.
+ *
+ * \p newparent may be either the root of \p newtopology or an object
+ * that was added through hwloc_custom_insert_group_object_by_parent().
+ *
+ * \note The cpuset and nodeset of the \p newparent object are not
+ * modified based on the contents of \p oldtopology.
+ */
+HWLOC_DECLSPEC int hwloc_custom_insert_topology(hwloc_topology_t newtopology, hwloc_obj_t newparent, hwloc_topology_t oldtopology, hwloc_obj_t oldroot);
+
+/** \brief Insert a new group object inside a custom topology
+ *
+ * An object with type ::HWLOC_OBJ_GROUP is inserted as a new child
+ * of object \p parent.
+ *
+ * \p groupdepth is the depth attribute to be given to the new object.
+ * It may for instance be 0 for top-level groups, 1 for their children,
+ * and so on.
+ *
+ * The custom topology \p newtopology must have been prepared with
+ * hwloc_topology_set_custom() and not loaded with hwloc_topology_load()
+ * yet.
+ *
+ * \p parent may be either the root of \p topology or an object that
+ * was added earlier through hwloc_custom_insert_group_object_by_parent().
+ *
+ * \note The cpuset and nodeset of the new group object are NULL because
+ * these sets are meaningless when assembling multiple topologies.
+ *
+ * \note The cpuset and nodeset of the \p parent object are not modified.
+ */
+HWLOC_DECLSPEC hwloc_obj_t hwloc_custom_insert_group_object_by_parent(hwloc_topology_t topology, hwloc_obj_t parent, int groupdepth);
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_xmlexport Exporting Topologies to XML
+ * @{
+ */
+
+/** \brief Export the topology into an XML file.
+ *
+ * This file may be loaded later through hwloc_topology_set_xml().
+ *
+ * \return -1 if a failure occured.
+ *
+ * \note See also hwloc_topology_set_userdata_export_callback()
+ * for exporting application-specific userdata.
+ *
+ * \note Only printable characters may be exported to XML string attributes.
+ * Any other character, especially any non-ASCII character, will be silently
+ * dropped.
+ */
+HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const char *xmlpath);
+
+/** \brief Export the topology into a newly-allocated XML memory buffer.
+ *
+ * \p xmlbuffer is allocated by the callee and should be freed with
+ * hwloc_free_xmlbuffer() later in the caller.
+ *
+ * This memory buffer may be loaded later through hwloc_topology_set_xmlbuffer().
+ *
+ * \return -1 if a failure occured.
+ *
+ * \note See also hwloc_topology_set_userdata_export_callback()
+ * for exporting application-specific userdata.
+ *
+ * \note Only printable characters may be exported to XML string attributes.
+ * Any other character, especially any non-ASCII character, will be silently
+ * dropped.
+ */
+HWLOC_DECLSPEC int hwloc_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlbuffer, int *buflen);
+
+/** \brief Free a buffer allocated by hwloc_topology_export_xmlbuffer() */
+HWLOC_DECLSPEC void hwloc_free_xmlbuffer(hwloc_topology_t topology, char *xmlbuffer);
+
+/** \brief Set the application-specific callback for exporting userdata
+ *
+ * The object userdata pointer is not exported to XML by default because hwloc
+ * does not know what it contains.
+ *
+ * This function lets applications set \p export_cb to a callback function
+ * that converts this opaque userdata into an exportable string.
+ *
+ * \p export_cb is invoked during XML export for each object whose
+ * \p userdata pointer is not \c NULL.
+ * The callback should use hwloc_export_obj_userdata() or
+ * hwloc_export_obj_userdata_base64() to actually export
+ * something to XML (possibly multiple times per object).
+ *
+ * \p export_cb may be set to \c NULL if userdata should not be exported to XML.
+ */
+HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t topology,
+ void (*export_cb)(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj));
+
+/** \brief Export some object userdata to XML
+ *
+ * This function may only be called from within the export() callback passed
+ * to hwloc_topology_set_userdata_export_callback().
+ * It may be invoked one of multiple times to export some userdata to XML.
+ * The \p buffer content of length \p length is stored with optional name
+ * \p name.
+ *
+ * When importing this XML file, the import() callback (if set) will be
+ * called exactly as many times as hwloc_export_obj_userdata() was called
+ * during export(). It will receive the corresponding \p name, \p buffer
+ * and \p length arguments.
+ *
+ * \p reserved, \p topology and \p obj must be the first three parameters
+ * that were given to the export callback.
+ *
+ * Only printable characters may be exported to XML string attributes.
+ * If a non-printable character is passed in \p name or \p buffer,
+ * the function returns -1 with errno set to EINVAL.
+ *
+ * If exporting binary data, the application should first encode into
+ * printable characters only (or use hwloc_export_obj_userdata_base64()).
+ * It should also take care of portability issues if the export may
+ * be reimported on a different architecture.
+ */
+HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
+
+/** \brief Encode and export some object userdata to XML
+ *
+ * This function is similar to hwloc_export_obj_userdata() but it encodes
+ * the input buffer into printable characters before exporting.
+ * On import, decoding is automatically performed before the data is given
+ * to the import() callback if any.
+ *
+ * This function may only be called from within the export() callback passed
+ * to hwloc_topology_set_userdata_export_callback().
+ *
+ * The function does not take care of portability issues if the export
+ * may be reimported on a different architecture.
+ */
+HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
+
+/** \brief Set the application-specific callback for importing userdata
+ *
+ * On XML import, userdata is ignored by default because hwloc does not know
+ * how to store it in memory.
+ *
+ * This function lets applications set \p import_cb to a callback function
+ * that will get the XML-stored userdata and store it in the object as expected
+ * by the application.
+ *
+ * \p import_cb is called during hwloc_topology_load() as many times as
+ * hwloc_export_obj_userdata() was called during export. The topology
+ * is not entirely setup yet. Object attributes are ready to consult,
+ * but links between objects are not.
+ *
+ * \p import_cb may be \c NULL if userdata should be ignored during import.
+ *
+ * \note \p buffer contains \p length characters followed by a null byte ('\0').
+ *
+ * \note This function should be called before hwloc_topology_load().
+ */
+HWLOC_DECLSPEC void hwloc_topology_set_userdata_import_callback(hwloc_topology_t topology,
+ void (*import_cb)(hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length));
+
+/** @} */
+
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+/* high-level helpers */
+#include
+
+/* inline code of some functions above */
+#include
+
+/* topology diffs */
+#include
+
+/* deprecated headers */
+#include
+
+#endif /* HWLOC_H */
diff --git a/ext/hwloc/include/hwloc/autogen/config.h b/ext/hwloc/include/hwloc/autogen/config.h
new file mode 100644
index 000000000..06f5d365e
--- /dev/null
+++ b/ext/hwloc/include/hwloc/autogen/config.h
@@ -0,0 +1,191 @@
+/* include/hwloc/autogen/config.h. Generated from config.h.in by configure. */
+/* -*- c -*-
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2010 inria. All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux 1
+ * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* The configuration file */
+
+#ifndef HWLOC_CONFIG_H
+#define HWLOC_CONFIG_H
+
+#if (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95))
+# define __hwloc_restrict __restrict
+#else
+# if __STDC_VERSION__ >= 199901L
+# define __hwloc_restrict restrict
+# else
+# define __hwloc_restrict
+# endif
+#endif
+
+/* Note that if we're compiling C++, then just use the "inline"
+ keyword, since it's part of C++ */
+#if defined(c_plusplus) || defined(__cplusplus)
+# define __hwloc_inline inline
+#elif defined(_MSC_VER) || defined(__HP_cc)
+# define __hwloc_inline __inline
+#else
+# define __hwloc_inline __inline__
+#endif
+
+/*
+ * Note: this is public. We can not assume anything from the compiler used
+ * by the application and thus the HWLOC_HAVE_* macros below are not
+ * fetched from the autoconf result here. We only automatically use a few
+ * well-known easy cases.
+ */
+
+/* Some handy constants to make the logic below a little more readable */
+#if defined(__cplusplus) && \
+ (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR >= 4))
+#define GXX_ABOVE_3_4 1
+#else
+#define GXX_ABOVE_3_4 0
+#endif
+
+#if !defined(__cplusplus) && \
+ (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95))
+#define GCC_ABOVE_2_95 1
+#else
+#define GCC_ABOVE_2_95 0
+#endif
+
+#if !defined(__cplusplus) && \
+ (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
+#define GCC_ABOVE_2_96 1
+#else
+#define GCC_ABOVE_2_96 0
+#endif
+
+#if !defined(__cplusplus) && \
+ (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
+#define GCC_ABOVE_3_3 1
+#else
+#define GCC_ABOVE_3_3 0
+#endif
+
+/* Maybe before gcc 2.95 too */
+#ifdef HWLOC_HAVE_ATTRIBUTE_UNUSED
+#define __HWLOC_HAVE_ATTRIBUTE_UNUSED HWLOC_HAVE_ATTRIBUTE_UNUSED
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_UNUSED (GXX_ABOVE_3_4 || GCC_ABOVE_2_95)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_UNUSED 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_UNUSED
+# define __hwloc_attribute_unused __attribute__((__unused__))
+#else
+# define __hwloc_attribute_unused
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_MALLOC
+#define __HWLOC_HAVE_ATTRIBUTE_MALLOC HWLOC_HAVE_ATTRIBUTE_MALLOC
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_MALLOC (GXX_ABOVE_3_4 || GCC_ABOVE_2_96)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_MALLOC 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_MALLOC
+# define __hwloc_attribute_malloc __attribute__((__malloc__))
+#else
+# define __hwloc_attribute_malloc
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_CONST
+#define __HWLOC_HAVE_ATTRIBUTE_CONST HWLOC_HAVE_ATTRIBUTE_CONST
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_CONST (GXX_ABOVE_3_4 || GCC_ABOVE_2_95)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_CONST 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_CONST
+# define __hwloc_attribute_const __attribute__((__const__))
+#else
+# define __hwloc_attribute_const
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_PURE
+#define __HWLOC_HAVE_ATTRIBUTE_PURE HWLOC_HAVE_ATTRIBUTE_PURE
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_PURE (GXX_ABOVE_3_4 || GCC_ABOVE_2_96)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_PURE 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_PURE
+# define __hwloc_attribute_pure __attribute__((__pure__))
+#else
+# define __hwloc_attribute_pure
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_DEPRECATED
+#define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED HWLOC_HAVE_ATTRIBUTE_DEPRECATED
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED (GXX_ABOVE_3_4 || GCC_ABOVE_3_3)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_DEPRECATED
+# define __hwloc_attribute_deprecated __attribute__((__deprecated__))
+#else
+# define __hwloc_attribute_deprecated
+#endif
+
+#ifdef HWLOC_C_HAVE_VISIBILITY
+# if HWLOC_C_HAVE_VISIBILITY
+# define HWLOC_DECLSPEC __attribute__((__visibility__("default")))
+# else
+# define HWLOC_DECLSPEC
+# endif
+#else
+# define HWLOC_DECLSPEC
+#endif
+
+/* Defined to 1 on Linux */
+#define HWLOC_LINUX_SYS 1
+
+/* Defined to 1 if the CPU_SET macro works */
+#define HWLOC_HAVE_CPU_SET 1
+
+/* Defined to 1 if you have the `windows.h' header. */
+/* #undef HWLOC_HAVE_WINDOWS_H */
+#define hwloc_pid_t pid_t
+#define hwloc_thread_t pthread_t
+
+#ifdef HWLOC_HAVE_WINDOWS_H
+
+# include
+typedef DWORDLONG hwloc_uint64_t;
+
+#else /* HWLOC_HAVE_WINDOWS_H */
+
+# ifdef hwloc_thread_t
+# include
+# endif /* hwloc_thread_t */
+
+/* Defined to 1 if you have the header file. */
+# define HWLOC_HAVE_STDINT_H 1
+
+# include
+# ifdef HWLOC_HAVE_STDINT_H
+# include
+# endif
+typedef uint64_t hwloc_uint64_t;
+
+#endif /* HWLOC_HAVE_WINDOWS_H */
+
+/* Whether we need to re-define all the hwloc public symbols or not */
+#define HWLOC_SYM_TRANSFORM 0
+
+/* The hwloc symbol prefix */
+#define HWLOC_SYM_PREFIX hwloc_
+
+#define HWLOC_HAVE_PCIUTILS 1
+
+/* The hwloc symbol prefix in all caps */
+#define HWLOC_SYM_PREFIX_CAPS HWLOC_
+
+#endif /* HWLOC_CONFIG_H */
diff --git a/ext/hwloc/include/hwloc/autogen/config.h.in b/ext/hwloc/include/hwloc/autogen/config.h.in
new file mode 100644
index 000000000..a30af0c2d
--- /dev/null
+++ b/ext/hwloc/include/hwloc/autogen/config.h.in
@@ -0,0 +1,188 @@
+/* -*- c -*-
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2010 inria. All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux 1
+ * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* The configuration file */
+
+#ifndef HWLOC_CONFIG_H
+#define HWLOC_CONFIG_H
+
+#if (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95))
+# define __hwloc_restrict __restrict
+#else
+# if __STDC_VERSION__ >= 199901L
+# define __hwloc_restrict restrict
+# else
+# define __hwloc_restrict
+# endif
+#endif
+
+/* Note that if we're compiling C++, then just use the "inline"
+ keyword, since it's part of C++ */
+#if defined(c_plusplus) || defined(__cplusplus)
+# define __hwloc_inline inline
+#elif defined(_MSC_VER) || defined(__HP_cc)
+# define __hwloc_inline __inline
+#else
+# define __hwloc_inline __inline__
+#endif
+
+/*
+ * Note: this is public. We can not assume anything from the compiler used
+ * by the application and thus the HWLOC_HAVE_* macros below are not
+ * fetched from the autoconf result here. We only automatically use a few
+ * well-known easy cases.
+ */
+
+/* Some handy constants to make the logic below a little more readable */
+#if defined(__cplusplus) && \
+ (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR >= 4))
+#define GXX_ABOVE_3_4 1
+#else
+#define GXX_ABOVE_3_4 0
+#endif
+
+#if !defined(__cplusplus) && \
+ (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 95))
+#define GCC_ABOVE_2_95 1
+#else
+#define GCC_ABOVE_2_95 0
+#endif
+
+#if !defined(__cplusplus) && \
+ (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96))
+#define GCC_ABOVE_2_96 1
+#else
+#define GCC_ABOVE_2_96 0
+#endif
+
+#if !defined(__cplusplus) && \
+ (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
+#define GCC_ABOVE_3_3 1
+#else
+#define GCC_ABOVE_3_3 0
+#endif
+
+/* Maybe before gcc 2.95 too */
+#ifdef HWLOC_HAVE_ATTRIBUTE_UNUSED
+#define __HWLOC_HAVE_ATTRIBUTE_UNUSED HWLOC_HAVE_ATTRIBUTE_UNUSED
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_UNUSED (GXX_ABOVE_3_4 || GCC_ABOVE_2_95)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_UNUSED 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_UNUSED
+# define __hwloc_attribute_unused __attribute__((__unused__))
+#else
+# define __hwloc_attribute_unused
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_MALLOC
+#define __HWLOC_HAVE_ATTRIBUTE_MALLOC HWLOC_HAVE_ATTRIBUTE_MALLOC
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_MALLOC (GXX_ABOVE_3_4 || GCC_ABOVE_2_96)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_MALLOC 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_MALLOC
+# define __hwloc_attribute_malloc __attribute__((__malloc__))
+#else
+# define __hwloc_attribute_malloc
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_CONST
+#define __HWLOC_HAVE_ATTRIBUTE_CONST HWLOC_HAVE_ATTRIBUTE_CONST
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_CONST (GXX_ABOVE_3_4 || GCC_ABOVE_2_95)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_CONST 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_CONST
+# define __hwloc_attribute_const __attribute__((__const__))
+#else
+# define __hwloc_attribute_const
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_PURE
+#define __HWLOC_HAVE_ATTRIBUTE_PURE HWLOC_HAVE_ATTRIBUTE_PURE
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_PURE (GXX_ABOVE_3_4 || GCC_ABOVE_2_96)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_PURE 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_PURE
+# define __hwloc_attribute_pure __attribute__((__pure__))
+#else
+# define __hwloc_attribute_pure
+#endif
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_DEPRECATED
+#define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED HWLOC_HAVE_ATTRIBUTE_DEPRECATED
+#elif defined(__GNUC__)
+# define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED (GXX_ABOVE_3_4 || GCC_ABOVE_3_3)
+#else
+# define __HWLOC_HAVE_ATTRIBUTE_DEPRECATED 0
+#endif
+#if __HWLOC_HAVE_ATTRIBUTE_DEPRECATED
+# define __hwloc_attribute_deprecated __attribute__((__deprecated__))
+#else
+# define __hwloc_attribute_deprecated
+#endif
+
+#ifdef HWLOC_C_HAVE_VISIBILITY
+# if HWLOC_C_HAVE_VISIBILITY
+# define HWLOC_DECLSPEC __attribute__((__visibility__("default")))
+# else
+# define HWLOC_DECLSPEC
+# endif
+#else
+# define HWLOC_DECLSPEC
+#endif
+
+/* Defined to 1 on Linux */
+#undef HWLOC_LINUX_SYS
+
+/* Defined to 1 if the CPU_SET macro works */
+#undef HWLOC_HAVE_CPU_SET
+
+/* Defined to 1 if you have the `windows.h' header. */
+#undef HWLOC_HAVE_WINDOWS_H
+#undef hwloc_pid_t
+#undef hwloc_thread_t
+
+#ifdef HWLOC_HAVE_WINDOWS_H
+
+# include
+typedef DWORDLONG hwloc_uint64_t;
+
+#else /* HWLOC_HAVE_WINDOWS_H */
+
+# ifdef hwloc_thread_t
+# include
+# endif /* hwloc_thread_t */
+
+/* Defined to 1 if you have the header file. */
+# undef HWLOC_HAVE_STDINT_H
+
+# include
+# ifdef HWLOC_HAVE_STDINT_H
+# include
+# endif
+typedef uint64_t hwloc_uint64_t;
+
+#endif /* HWLOC_HAVE_WINDOWS_H */
+
+/* Whether we need to re-define all the hwloc public symbols or not */
+#undef HWLOC_SYM_TRANSFORM
+
+/* The hwloc symbol prefix */
+#undef HWLOC_SYM_PREFIX
+
+/* The hwloc symbol prefix in all caps */
+#undef HWLOC_SYM_PREFIX_CAPS
+
+#endif /* HWLOC_CONFIG_H */
diff --git a/ext/hwloc/include/hwloc/autogen/stamp-h2 b/ext/hwloc/include/hwloc/autogen/stamp-h2
new file mode 100644
index 000000000..804e0acce
--- /dev/null
+++ b/ext/hwloc/include/hwloc/autogen/stamp-h2
@@ -0,0 +1 @@
+timestamp for include/hwloc/autogen/config.h
diff --git a/ext/hwloc/include/hwloc/bitmap.h b/ext/hwloc/include/hwloc/bitmap.h
new file mode 100644
index 000000000..adf589b84
--- /dev/null
+++ b/ext/hwloc/include/hwloc/bitmap.h
@@ -0,0 +1,350 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2011 inria. All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux 1
+ * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief The bitmap API, for use in hwloc itself.
+ */
+
+#ifndef HWLOC_BITMAP_H
+#define HWLOC_BITMAP_H
+
+#include
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_bitmap The bitmap API
+ *
+ * The ::hwloc_bitmap_t type represents a set of objects, typically OS
+ * processors -- which may actually be hardware threads (represented
+ * by ::hwloc_cpuset_t, which is a typedef for ::hwloc_bitmap_t) -- or
+ * memory nodes (represented by ::hwloc_nodeset_t, which is also a
+ * typedef for ::hwloc_bitmap_t).
+ *
+ * Both CPU and node sets are always indexed by OS physical number.
+ *
+ * \note CPU sets and nodesets are described in \ref hwlocality_object_sets.
+ *
+ * A bitmap may be of infinite size.
+ * @{
+ */
+
+
+/** \brief
+ * Set of bits represented as an opaque pointer to an internal bitmap.
+ */
+typedef struct hwloc_bitmap_s * hwloc_bitmap_t;
+/** \brief a non-modifiable ::hwloc_bitmap_t */
+typedef const struct hwloc_bitmap_s * hwloc_const_bitmap_t;
+
+
+/*
+ * Bitmap allocation, freeing and copying.
+ */
+
+/** \brief Allocate a new empty bitmap.
+ *
+ * \returns A valid bitmap or \c NULL.
+ *
+ * The bitmap should be freed by a corresponding call to
+ * hwloc_bitmap_free().
+ */
+HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc;
+
+/** \brief Allocate a new full bitmap. */
+HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc;
+
+/** \brief Free bitmap \p bitmap.
+ *
+ * If \p bitmap is \c NULL, no operation is performed.
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_free(hwloc_bitmap_t bitmap);
+
+/** \brief Duplicate bitmap \p bitmap by allocating a new bitmap and copying \p bitmap contents.
+ *
+ * If \p bitmap is \c NULL, \c NULL is returned.
+ */
+HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_dup(hwloc_const_bitmap_t bitmap) __hwloc_attribute_malloc;
+
+/** \brief Copy the contents of bitmap \p src into the already allocated bitmap \p dst */
+HWLOC_DECLSPEC void hwloc_bitmap_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t src);
+
+
+/*
+ * Bitmap/String Conversion
+ */
+
+/** \brief Stringify a bitmap.
+ *
+ * Up to \p buflen characters may be written in buffer \p buf.
+ *
+ * If \p buflen is 0, \p buf may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
+
+/** \brief Stringify a bitmap into a newly allocated string.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
+
+/** \brief Parse a bitmap string and stores it in bitmap \p bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
+
+/** \brief Stringify a bitmap in the list format.
+ *
+ * Lists are comma-separated indexes or ranges.
+ * Ranges are dash separated indexes.
+ * The last range may not have a ending indexes if the bitmap is infinite.
+ *
+ * Up to \p buflen characters may be written in buffer \p buf.
+ *
+ * If \p buflen is 0, \p buf may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
+
+/** \brief Stringify a bitmap into a newly allocated list string.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
+
+/** \brief Parse a list string and stores it in bitmap \p bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
+
+/** \brief Stringify a bitmap in the taskset-specific format.
+ *
+ * The taskset command manipulates bitmap strings that contain a single
+ * (possible very long) hexadecimal number starting with 0x.
+ *
+ * Up to \p buflen characters may be written in buffer \p buf.
+ *
+ * If \p buflen is 0, \p buf may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
+
+/** \brief Stringify a bitmap into a newly allocated taskset-specific string.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
+
+/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
+
+
+/*
+ * Building bitmaps.
+ */
+
+/** \brief Empty the bitmap \p bitmap */
+HWLOC_DECLSPEC void hwloc_bitmap_zero(hwloc_bitmap_t bitmap);
+
+/** \brief Fill bitmap \p bitmap with all possible indexes (even if those objects don't exist or are otherwise unavailable) */
+HWLOC_DECLSPEC void hwloc_bitmap_fill(hwloc_bitmap_t bitmap);
+
+/** \brief Empty the bitmap \p bitmap and add bit \p id */
+HWLOC_DECLSPEC void hwloc_bitmap_only(hwloc_bitmap_t bitmap, unsigned id);
+
+/** \brief Fill the bitmap \p and clear the index \p id */
+HWLOC_DECLSPEC void hwloc_bitmap_allbut(hwloc_bitmap_t bitmap, unsigned id);
+
+/** \brief Setup bitmap \p bitmap from unsigned long \p mask */
+HWLOC_DECLSPEC void hwloc_bitmap_from_ulong(hwloc_bitmap_t bitmap, unsigned long mask);
+
+/** \brief Setup bitmap \p bitmap from unsigned long \p mask used as \p i -th subset */
+HWLOC_DECLSPEC void hwloc_bitmap_from_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask);
+
+
+/*
+ * Modifying bitmaps.
+ */
+
+/** \brief Add index \p id in bitmap \p bitmap */
+HWLOC_DECLSPEC void hwloc_bitmap_set(hwloc_bitmap_t bitmap, unsigned id);
+
+/** \brief Add indexes from \p begin to \p end in bitmap \p bitmap.
+ *
+ * If \p end is \c -1, the range is infinite.
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_set_range(hwloc_bitmap_t bitmap, unsigned begin, int end);
+
+/** \brief Replace \p i -th subset of bitmap \p bitmap with unsigned long \p mask */
+HWLOC_DECLSPEC void hwloc_bitmap_set_ith_ulong(hwloc_bitmap_t bitmap, unsigned i, unsigned long mask);
+
+/** \brief Remove index \p id from bitmap \p bitmap */
+HWLOC_DECLSPEC void hwloc_bitmap_clr(hwloc_bitmap_t bitmap, unsigned id);
+
+/** \brief Remove indexes from \p begin to \p end in bitmap \p bitmap.
+ *
+ * If \p end is \c -1, the range is infinite.
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_clr_range(hwloc_bitmap_t bitmap, unsigned begin, int end);
+
+/** \brief Keep a single index among those set in bitmap \p bitmap
+ *
+ * May be useful before binding so that the process does not
+ * have a chance of migrating between multiple logical CPUs
+ * in the original mask.
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_singlify(hwloc_bitmap_t bitmap);
+
+
+/*
+ * Consulting bitmaps.
+ */
+
+/** \brief Convert the beginning part of bitmap \p bitmap into unsigned long \p mask */
+HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ulong(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Convert the \p i -th subset of bitmap \p bitmap into unsigned long mask */
+HWLOC_DECLSPEC unsigned long hwloc_bitmap_to_ith_ulong(hwloc_const_bitmap_t bitmap, unsigned i) __hwloc_attribute_pure;
+
+/** \brief Test whether index \p id is part of bitmap \p bitmap */
+HWLOC_DECLSPEC int hwloc_bitmap_isset(hwloc_const_bitmap_t bitmap, unsigned id) __hwloc_attribute_pure;
+
+/** \brief Test whether bitmap \p bitmap is empty */
+HWLOC_DECLSPEC int hwloc_bitmap_iszero(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Test whether bitmap \p bitmap is completely full */
+HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Compute the first index (least significant bit) in bitmap \p bitmap
+ *
+ * \return -1 if no index is set.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Compute the next index in bitmap \p bitmap which is after index \p prev
+ *
+ * If \p prev is -1, the first index is returned.
+ *
+ * \return -1 if no index with higher index is bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;
+
+/** \brief Compute the last index (most significant bit) in bitmap \p bitmap
+ *
+ * \return -1 if no index is bitmap, or if the index bitmap is infinite.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Compute the "weight" of bitmap \p bitmap (i.e., number of
+ * indexes that are in the bitmap).
+ *
+ * \return the number of indexes that are in the bitmap.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
+
+/** \brief Loop macro iterating on bitmap \p bitmap
+ * \hideinitializer
+ *
+ * \p index is the loop variable; it should be an unsigned int. The
+ * first iteration will set \p index to the lowest index in the bitmap.
+ * Successive iterations will iterate through, in order, all remaining
+ * indexes that in the bitmap. To be specific: each iteration will return a
+ * value for \p index such that hwloc_bitmap_isset(bitmap, index) is true.
+ *
+ * The assert prevents the loop from being infinite if the bitmap is infinite.
+ */
+#define hwloc_bitmap_foreach_begin(id, bitmap) \
+do { \
+ assert(hwloc_bitmap_weight(bitmap) != -1); \
+ for (id = hwloc_bitmap_first(bitmap); \
+ (unsigned) id != (unsigned) -1; \
+ id = hwloc_bitmap_next(bitmap, id)) { \
+/** \brief End of loop. Needs a terminating ';'.
+ * \hideinitializer
+ *
+ * \sa hwloc_bitmap_foreach_begin */
+#define hwloc_bitmap_foreach_end() \
+ } \
+} while (0)
+
+
+/*
+ * Combining bitmaps.
+ */
+
+/** \brief Or bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res
+ *
+ * \p res can be the same as \p bitmap1 or \p bitmap2
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_or (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);
+
+/** \brief And bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res
+ *
+ * \p res can be the same as \p bitmap1 or \p bitmap2
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_and (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);
+
+/** \brief And bitmap \p bitmap1 and the negation of \p bitmap2 and store the result in bitmap \p res
+ *
+ * \p res can be the same as \p bitmap1 or \p bitmap2
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_andnot (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);
+
+/** \brief Xor bitmaps \p bitmap1 and \p bitmap2 and store the result in bitmap \p res
+ *
+ * \p res can be the same as \p bitmap1 or \p bitmap2
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_xor (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2);
+
+/** \brief Negate bitmap \p bitmap and store the result in bitmap \p res
+ *
+ * \p res can be the same as \p bitmap
+ */
+HWLOC_DECLSPEC void hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t bitmap);
+
+
+/*
+ * Comparing bitmaps.
+ */
+
+/** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects */
+HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
+
+/** \brief Test whether bitmap \p sub_bitmap is part of bitmap \p super_bitmap */
+HWLOC_DECLSPEC int hwloc_bitmap_isincluded (hwloc_const_bitmap_t sub_bitmap, hwloc_const_bitmap_t super_bitmap) __hwloc_attribute_pure;
+
+/** \brief Test whether bitmap \p bitmap1 is equal to bitmap \p bitmap2 */
+HWLOC_DECLSPEC int hwloc_bitmap_isequal (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
+
+/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 using their lowest index.
+ *
+ * Smaller least significant bit is smaller.
+ * The empty bitmap is considered higher than anything.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_compare_first(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
+
+/** \brief Compare bitmaps \p bitmap1 and \p bitmap2 using their highest index.
+ *
+ * Higher most significant bit is higher.
+ * The empty bitmap is considered lower than anything.
+ */
+HWLOC_DECLSPEC int hwloc_bitmap_compare(hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_BITMAP_H */
diff --git a/ext/hwloc/include/hwloc/cuda.h b/ext/hwloc/include/hwloc/cuda.h
new file mode 100644
index 000000000..25201689e
--- /dev/null
+++ b/ext/hwloc/include/hwloc/cuda.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright © 2010-2013 Inria. All rights reserved.
+ * Copyright © 2010-2011 Université Bordeaux 1
+ * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and the CUDA Driver API.
+ *
+ * Applications that use both hwloc and the CUDA Driver API may want to
+ * include this file so as to get topology information for CUDA devices.
+ *
+ */
+
+#ifndef HWLOC_CUDA_H
+#define HWLOC_CUDA_H
+
+#include
+#include
+#include
+#ifdef HWLOC_LINUX_SYS
+#include
+#endif
+
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_cuda Interoperability with the CUDA Driver API
+ *
+ * This interface offers ways to retrieve topology information about
+ * CUDA devices when using the CUDA Driver API.
+ *
+ * @{
+ */
+
+/** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice.
+ *
+ * Device \p cudevice must match the local machine.
+ */
+static __hwloc_inline int
+hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
+ CUdevice cudevice, int *domain, int *bus, int *dev)
+{
+ CUresult cres;
+
+#ifdef CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID
+ cres = cuDeviceGetAttribute(domain, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, cudevice);
+ if (cres != CUDA_SUCCESS) {
+ errno = ENOSYS;
+ return -1;
+ }
+#else
+ *domain = 0;
+#endif
+ cres = cuDeviceGetAttribute(bus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cudevice);
+ if (cres != CUDA_SUCCESS) {
+ errno = ENOSYS;
+ return -1;
+ }
+ cres = cuDeviceGetAttribute(dev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cudevice);
+ if (cres != CUDA_SUCCESS) {
+ errno = ENOSYS;
+ return -1;
+ }
+
+ return 0;
+}
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to device \p cudevice.
+ *
+ * Return the CPU set describing the locality of the CUDA device \p cudevice.
+ *
+ * Topology \p topology and device \p cudevice must match the local machine.
+ * I/O devices detection and the CUDA component are not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_cuda_get_device_osdev()
+ * and hwloc_cuda_get_device_osdev_by_index().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux; other systems will simply get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+ CUdevice cudevice, hwloc_cpuset_t set)
+{
+#ifdef HWLOC_LINUX_SYS
+ /* If we're on Linux, use the sysfs mechanism to get the local cpus */
+#define HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX 128
+ char path[HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX];
+ FILE *sysfile = NULL;
+ int domainid, busid, deviceid;
+
+ if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domainid, &busid, &deviceid))
+ return -1;
+
+ if (!hwloc_topology_is_thissystem(topology)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domainid, busid, deviceid);
+ sysfile = fopen(path, "r");
+ if (!sysfile)
+ return -1;
+
+ hwloc_linux_parse_cpumap_file(sysfile, set);
+ if (hwloc_bitmap_iszero(set))
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+
+ fclose(sysfile);
+#else
+ /* Non-Linux systems simply get a full cpuset */
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+ return 0;
+}
+
+/** \brief Get the hwloc PCI device object corresponding to the
+ * CUDA device \p cudevice.
+ *
+ * Return the PCI device object describing the CUDA device \p cudevice.
+ * Return NULL if there is none.
+ *
+ * Topology \p topology and device \p cudevice must match the local machine.
+ * I/O devices detection must be enabled in topology \p topology.
+ * The CUDA component is not needed in the topology.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice)
+{
+ int domain, bus, dev;
+
+ if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
+ return NULL;
+
+ return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
+}
+
+/** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice.
+ *
+ * Return the hwloc OS device object that describes the given
+ * CUDA device \p cudevice. Return NULL if there is none.
+ *
+ * Topology \p topology and device \p cudevice must match the local machine.
+ * I/O devices detection and the NVML component must be enabled in the topology.
+ * If not, the locality of the object may still be found using
+ * hwloc_cuda_get_device_cpuset().
+ *
+ * \note The corresponding hwloc PCI device may be found by looking
+ * at the result parent pointer.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice)
+{
+ hwloc_obj_t osdev = NULL;
+ int domain, bus, dev;
+
+ if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
+ return NULL;
+
+ osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ hwloc_obj_t pcidev = osdev->parent;
+ if (strncmp(osdev->name, "cuda", 4))
+ continue;
+ if (pcidev
+ && pcidev->type == HWLOC_OBJ_PCI_DEVICE
+ && (int) pcidev->attr->pcidev.domain == domain
+ && (int) pcidev->attr->pcidev.bus == bus
+ && (int) pcidev->attr->pcidev.dev == dev
+ && pcidev->attr->pcidev.func == 0)
+ return osdev;
+ }
+
+ return NULL;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * CUDA device whose index is \p idx.
+ *
+ * Return the OS device object describing the CUDA device whose
+ * index is \p idx. Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the CUDA component must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ *
+ * \note This function is identical to hwloc_cudart_get_device_osdev_by_index().
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_cuda_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
+{
+ hwloc_obj_t osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
+ && osdev->name
+ && !strncmp("cuda", osdev->name, 4)
+ && atoi(osdev->name + 4) == (int) idx)
+ return osdev;
+ }
+ return NULL;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_CUDA_H */
diff --git a/ext/hwloc/include/hwloc/cudart.h b/ext/hwloc/include/hwloc/cudart.h
new file mode 100644
index 000000000..606d2d075
--- /dev/null
+++ b/ext/hwloc/include/hwloc/cudart.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2010-2013 Inria. All rights reserved.
+ * Copyright © 2010-2011 Université Bordeaux 1
+ * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and the CUDA Runtime API.
+ *
+ * Applications that use both hwloc and the CUDA Runtime API may want to
+ * include this file so as to get topology information for CUDA devices.
+ *
+ */
+
+#ifndef HWLOC_CUDART_H
+#define HWLOC_CUDART_H
+
+#include
+#include
+#include
+#ifdef HWLOC_LINUX_SYS
+#include
+#endif
+
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_cudart Interoperability with the CUDA Runtime API
+ *
+ * This interface offers ways to retrieve topology information about
+ * CUDA devices when using the CUDA Runtime API.
+ *
+ * @{
+ */
+
+/** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx.
+ *
+ * Device index \p idx must match the local machine.
+ */
+static __hwloc_inline int
+hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
+ int idx, int *domain, int *bus, int *dev)
+{
+ cudaError_t cerr;
+ struct cudaDeviceProp prop;
+
+ cerr = cudaGetDeviceProperties(&prop, idx);
+ if (cerr) {
+ errno = ENOSYS;
+ return -1;
+ }
+
+#ifdef CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID
+ *domain = prop.pciDomainID;
+#else
+ *domain = 0;
+#endif
+
+ *bus = prop.pciBusID;
+ *dev = prop.pciDeviceID;
+
+ return 0;
+}
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to device \p idx.
+ *
+ * Return the CPU set describing the locality of the CUDA device
+ * whose index is \p idx.
+ *
+ * Topology \p topology and device \p idx must match the local machine.
+ * I/O devices detection and the CUDA component are not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_cudart_get_device_osdev_by_index().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux; other systems will simply get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+ int idx, hwloc_cpuset_t set)
+{
+#ifdef HWLOC_LINUX_SYS
+ /* If we're on Linux, use the sysfs mechanism to get the local cpus */
+#define HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX 128
+ char path[HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX];
+ FILE *sysfile = NULL;
+ int domain, bus, dev;
+
+ if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev))
+ return -1;
+
+ if (!hwloc_topology_is_thissystem(topology)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domain, bus, dev);
+ sysfile = fopen(path, "r");
+ if (!sysfile)
+ return -1;
+
+ hwloc_linux_parse_cpumap_file(sysfile, set);
+ if (hwloc_bitmap_iszero(set))
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+
+ fclose(sysfile);
+#else
+ /* Non-Linux systems simply get a full cpuset */
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+ return 0;
+}
+
+/** \brief Get the hwloc PCI device object corresponding to the
+ * CUDA device whose index is \p idx.
+ *
+ * Return the PCI device object describing the CUDA device whose
+ * index is \p idx. Return NULL if there is none.
+ *
+ * Topology \p topology and device \p idx must match the local machine.
+ * I/O devices detection must be enabled in topology \p topology.
+ * The CUDA component is not needed in the topology.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_cudart_get_device_pcidev(hwloc_topology_t topology, int idx)
+{
+ int domain, bus, dev;
+
+ if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev))
+ return NULL;
+
+ return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * CUDA device whose index is \p idx.
+ *
+ * Return the OS device object describing the CUDA device whose
+ * index is \p idx. Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the CUDA component must be enabled in the topology.
+ * If not, the locality of the object may still be found using
+ * hwloc_cudart_get_device_cpuset().
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ *
+ * \note This function is identical to hwloc_cuda_get_device_osdev_by_index().
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_cudart_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
+{
+ hwloc_obj_t osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
+ && osdev->name
+ && !strncmp("cuda", osdev->name, 4)
+ && atoi(osdev->name + 4) == (int) idx)
+ return osdev;
+ }
+ return NULL;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_CUDART_H */
diff --git a/ext/hwloc/include/hwloc/deprecated.h b/ext/hwloc/include/hwloc/deprecated.h
new file mode 100644
index 000000000..544ca8f0a
--- /dev/null
+++ b/ext/hwloc/include/hwloc/deprecated.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2013 Inria. All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux 1
+ * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/**
+ * This file contains the inline code of functions declared in hwloc.h
+ */
+
+#ifndef HWLOC_DEPRECATED_H
+#define HWLOC_DEPRECATED_H
+
+#ifndef HWLOC_H
+#error Please include the main hwloc.h instead
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Stringify a given topology object into a human-readable form.
+ *
+ * \note This function is deprecated in favor of hwloc_obj_type_snprintf()
+ * and hwloc_obj_attr_snprintf() since it is not very flexible and
+ * only prints physical/OS indexes.
+ *
+ * Fill string \p string up to \p size characters with the description
+ * of topology object \p obj in topology \p topology.
+ *
+ * If \p verbose is set, a longer description is used. Otherwise a
+ * short description is used.
+ *
+ * \p indexprefix is used to prefix the \p os_index attribute number of
+ * the object in the description. If \c NULL, the \c # character is used.
+ *
+ * If \p size is 0, \p string may safely be \c NULL.
+ *
+ * \return the number of character that were actually written if not truncating,
+ * or that would have been written (not including the ending \\0).
+ */
+HWLOC_DECLSPEC int hwloc_obj_snprintf(char * __hwloc_restrict string, size_t size,
+ hwloc_topology_t topology, hwloc_obj_t obj,
+ const char * __hwloc_restrict indexprefix, int verbose);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_INLINES_H */
diff --git a/ext/hwloc/include/hwloc/diff.h b/ext/hwloc/include/hwloc/diff.h
new file mode 100644
index 000000000..59f729657
--- /dev/null
+++ b/ext/hwloc/include/hwloc/diff.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright © 2013 Inria. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Topology differences.
+ */
+
+#ifndef HWLOC_DIFF_H
+#define HWLOC_DIFF_H
+
+#ifndef HWLOC_H
+#error Please include the main hwloc.h instead
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#elif 0
+}
+#endif
+
+
+/** \defgroup hwlocality_diff Topology differences
+ *
+ * Applications that manipulate many similar topologies, for instance
+ * one for each node of a homogeneous cluster, may want to compress
+ * topologies to reduce the memory footprint.
+ *
+ * This file offers a way to manipulate the difference between topologies
+ * and export/import it to/from XML.
+ * Compression may therefore be achieved by storing one topology
+ * entirely while the others are only described by their differences
+ * with the former.
+ * The actual topology can be reconstructed when actually needed by
+ * applying the precomputed difference to the reference topology.
+ *
+ * This interface targets very similar nodes.
+ * Only very simple differences between topologies are actually
+ * supported, for instance a change in the memory size, the name
+ * of the object, or some info attribute.
+ * More complex differences such as adding or removing objects cannot
+ * be represented in the difference structures and therefore return
+ * errors.
+ *
+ * @{
+ */
+
+
+/** \brief Type of one object attribute difference.
+ */
+typedef enum hwloc_topology_diff_obj_attr_type_e {
+ /** \brief The object local memory is modified.
+ * The union is a hwloc_topology_diff_obj_attr_uint64_s
+ * (and the index field is ignored).
+ */
+ HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE,
+
+ /** \brief The object name is modified.
+ * The union is a hwloc_topology_diff_obj_attr_string_s
+ * (and the name field is ignored).
+ */
+
+ HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME,
+ /** \brief the value of an info attribute is modified.
+ * The union is a hwloc_topology_diff_obj_attr_string_s.
+ */
+ HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO
+} hwloc_topology_diff_obj_attr_type_t;
+
+/** \brief One object attribute difference.
+ */
+union hwloc_topology_diff_obj_attr_u {
+ struct hwloc_topology_diff_obj_attr_generic_s {
+ /* each part of the union must start with these */
+ hwloc_topology_diff_obj_attr_type_t type;
+ } generic;
+
+ /** \brief Integer attribute modification with an optional index. */
+ struct hwloc_topology_diff_obj_attr_uint64_s {
+ /* used for storing integer attributes */
+ hwloc_topology_diff_obj_attr_type_t type;
+ hwloc_uint64_t index; /* not used for SIZE */
+ hwloc_uint64_t oldvalue;
+ hwloc_uint64_t newvalue;
+ } uint64;
+
+ /** \brief String attribute modification with an optional name */
+ struct hwloc_topology_diff_obj_attr_string_s {
+ /* used for storing name and info pairs */
+ hwloc_topology_diff_obj_attr_type_t type;
+ char *name; /* not used for NAME */
+ char *oldvalue;
+ char *newvalue;
+ } string;
+};
+
+
+/** \brief Type of one element of a difference list.
+ */
+typedef enum hwloc_topology_diff_type_e {
+ /*< \brief An object attribute was changed.
+ * The union is a hwloc_topology_diff_obj_attr_s.
+ */
+ HWLOC_TOPOLOGY_DIFF_OBJ_ATTR,
+
+ /*< \brief The difference is too complex,
+ * it cannot be represented. The difference below
+ * this object has not been checked.
+ * hwloc_topology_diff_build() will return 1.
+ *
+ * The union is a hwloc_topology_diff_too_complex_s.
+ */
+ HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX
+} hwloc_topology_diff_type_t;
+
+/** \brief One element of a difference list between two topologies.
+ */
+typedef union hwloc_topology_diff_u {
+ struct hwloc_topology_diff_generic_s {
+ /* each part of the union must start with these */
+ hwloc_topology_diff_type_t type;
+ union hwloc_topology_diff_u * next;
+ } generic;
+
+ /* A difference in an object attribute. */
+ struct hwloc_topology_diff_obj_attr_s {
+ hwloc_topology_diff_type_t type; /* must be HWLOC_TOPOLOGY_DIFF_OBJ_ATTR */
+ union hwloc_topology_diff_u * next;
+ /* List of attribute differences for a single object */
+ unsigned obj_depth;
+ unsigned obj_index;
+ union hwloc_topology_diff_obj_attr_u diff;
+ } obj_attr;
+
+ /* A difference that is too complex. */
+ struct hwloc_topology_diff_too_complex_s {
+ hwloc_topology_diff_type_t type; /* must be HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX */
+ union hwloc_topology_diff_u * next;
+ /* Where we had to stop computing the diff in the first topology */
+ unsigned obj_depth;
+ unsigned obj_index;
+ } too_complex;
+} * hwloc_topology_diff_t;
+
+
+/** \brief Compute the difference between 2 topologies.
+ *
+ * The difference is stored as a list of hwloc_topology_diff_t entries
+ * starting at \p diff.
+ * It is computed by doing a depth-first traversal of both topology trees
+ * simultaneously.
+ *
+ * If the difference between 2 objects is too complex to be represented
+ * (for instance if some objects are added or removed), a special diff
+ * entry of type HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX is queued.
+ * The computation of the diff does not continue under these objects.
+ * So each such diff entry means that the difference between two subtrees
+ * could not be computed.
+ *
+ * \return 0 if the difference can be represented properly.
+ *
+ * \return 0 with \p diff pointing NULL if there is no difference between
+ * the topologies.
+ *
+ * \return 1 if the difference is too complex (for instance if some objects are added
+ * or removed), some entries in the list will be of type HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX
+ * and 1 is returned.
+ *
+ * \return -1 on any other error.
+ *
+ * \note \p flags is currently not used. It should be 0.
+ *
+ * \note The output diff has to be freed with hwloc_topology_diff_destroy().
+ *
+ * \note The output diff can only be exported to XML or passed to
+ * hwloc_topology_diff_apply() if 0 was returned, i.e. if no entry of type
+ * HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX is listed.
+ *
+ * \note The output diff may be modified by removing some entries from
+ * the list. The removed entries should be freed by passing them as a list
+ * to hwloc_topology_diff_destroy().
+*/
+HWLOC_DECLSPEC int hwloc_topology_diff_build(hwloc_topology_t topology, hwloc_topology_t newtopology, unsigned long flags, hwloc_topology_diff_t *diff);
+
+/** \brief Flags to be given to hwloc_topology_diff_apply().
+ */
+enum hwloc_topology_diff_apply_flags_e {
+ /** \brief Apply topology diff in reverse direction.
+ * \hideinitializer
+ */
+ HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE = (1UL<<0)
+};
+
+/** \brief Apply a topology diff to an existing topology.
+ *
+ * \p flags is an OR'ed set of hwloc_topology_diff_apply_flags_e.
+ *
+ * The new topology is modified in place. hwloc_topology_dup()
+ * may be used to duplicate before patching.
+ *
+ * If the difference cannot be applied entirely, all previous applied
+ * portions are unapplied before returning.
+ *
+ * \return 0 on success.
+ *
+ * \return -N if applying the difference failed while trying
+ * to apply the N-th part of the difference. For instance -1
+ * is returned if the very first difference portion could not
+ * be applied.
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags);
+
+/** \brief Destroy a list of topology differences.
+ *
+ * \note The \p topology parameter must be a valid topology
+ * but it is not required that it is related to \p diff.
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_t topology, hwloc_topology_diff_t diff);
+
+/** \brief Load a list of topology differences from a XML file.
+ *
+ * If not \c NULL, \p refname will be filled with the identifier
+ * string of the reference topology for the difference file,
+ * if any was specified in the XML file.
+ * This identifier is usually the name of the other XML file
+ * that contains the reference topology.
+ *
+ * \note The \p topology parameter must be a valid topology
+ * but it is not required that it is related to \p diff.
+ *
+ * \note the pointer returned in refname should later be freed
+ * by the caller.
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_load_xml(hwloc_topology_t topology, const char *xmlpath, hwloc_topology_diff_t *diff, char **refname);
+
+/** \brief Export a list of topology differences to a XML file.
+ *
+ * If not \c NULL, \p refname defines an identifier string
+ * for the reference topology which was used as a base when
+ * computing this difference.
+ * This identifier is usually the name of the other XML file
+ * that contains the reference topology.
+ * This attribute is given back when reading the diff from XML.
+ *
+ * \note The \p topology parameter must be a valid topology
+ * but it is not required that it is related to \p diff.
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_t topology, hwloc_topology_diff_t diff, const char *refname, const char *xmlpath);
+
+/** \brief Load a list of topology differences from a XML buffer.
+ *
+ * If not \c NULL, \p refname will be filled with the identifier
+ * string of the reference topology for the difference file,
+ * if any was specified in the XML file.
+ * This identifier is usually the name of the other XML file
+ * that contains the reference topology.
+ *
+ * \note The \p topology parameter must be a valid topology
+ * but it is not required that it is related to \p diff.
+ *
+ * \note the pointer returned in refname should later be freed
+ * by the caller.
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(hwloc_topology_t topology, const char *xmlbuffer, int buflen, hwloc_topology_diff_t *diff, char **refname);
+
+/** \brief Export a list of topology differences to a XML buffer.
+ *
+ * If not \c NULL, \p refname defines an identifier string
+ * for the reference topology which was used as a base when
+ * computing this difference.
+ * This identifier is usually the name of the other XML file
+ * that contains the reference topology.
+ * This attribute is given back when reading the diff from XML.
+ *
+ * \note The XML buffer should later be freed with hwloc_free_xmlbuffer().
+ *
+ * \note The \p topology parameter must be a valid topology
+ * but it is not required that it is related to \p diff.
+ */
+HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_t topology, hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen);
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_HELPER_H */
diff --git a/ext/hwloc/include/hwloc/gl.h b/ext/hwloc/include/hwloc/gl.h
new file mode 100644
index 000000000..4b8b3f230
--- /dev/null
+++ b/ext/hwloc/include/hwloc/gl.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright © 2012 Blue Brain Project, EPFL. All rights reserved.
+ * Copyright © 2012-2013 Inria. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and OpenGL displays.
+ *
+ * Applications that use both hwloc and OpenGL may want to include
+ * this file so as to get topology information for OpenGL displays.
+ */
+
+#ifndef HWLOC_GL_H
+#define HWLOC_GL_H
+
+#include
+
+#include
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_gl Interoperability with OpenGL displays
+ *
+ * This interface offers ways to retrieve topology information about
+ * OpenGL displays.
+ *
+ * Only the NVIDIA display locality information is currently available,
+ * using the NV-CONTROL X11 extension and the NVCtrl library.
+ *
+ * @{
+ */
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * OpenGL display given by port and device index.
+ *
+ * Return the OS device object describing the OpenGL display
+ * whose port (server) is \p port and device (screen) is \p device.
+ * Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the GL component must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_gl_get_display_osdev_by_port_device(hwloc_topology_t topology,
+ unsigned port, unsigned device)
+{
+ unsigned x = (unsigned) -1, y = (unsigned) -1;
+ hwloc_obj_t osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
+ && osdev->name
+ && sscanf(osdev->name, ":%u.%u", &x, &y) == 2
+ && port == x && device == y)
+ return osdev;
+ }
+ errno = EINVAL;
+ return NULL;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * OpenGL display given by name.
+ *
+ * Return the OS device object describing the OpenGL display
+ * whose name is \p name, built as ":port.device" such as ":0.0" .
+ * Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the GL component must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology,
+ const char *name)
+{
+ hwloc_obj_t osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
+ && osdev->name
+ && !strcmp(name, osdev->name))
+ return osdev;
+ }
+ errno = EINVAL;
+ return NULL;
+}
+
+/** \brief Get the OpenGL display port and device corresponding
+ * to the given hwloc OS object.
+ *
+ * Return the OpenGL display port (server) in \p port and device (screen)
+ * in \p screen that correspond to the given hwloc OS device object.
+ * Return \c -1 if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the GL component must be enabled in the topology.
+ */
+static __hwloc_inline int
+hwloc_gl_get_display_by_osdev(hwloc_topology_t topology __hwloc_attribute_unused,
+ hwloc_obj_t osdev,
+ unsigned *port, unsigned *device)
+{
+ unsigned x = -1, y = -1;
+ if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
+ && sscanf(osdev->name, ":%u.%u", &x, &y) == 2) {
+ *port = x;
+ *device = y;
+ return 0;
+ }
+ errno = EINVAL;
+ return -1;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_GL_H */
+
diff --git a/ext/hwloc/include/hwloc/glibc-sched.h b/ext/hwloc/include/hwloc/glibc-sched.h
new file mode 100644
index 000000000..58926ff11
--- /dev/null
+++ b/ext/hwloc/include/hwloc/glibc-sched.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2013 inria. All rights reserved.
+ * Copyright © 2009-2011 Université Bordeaux 1
+ * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and glibc scheduling routines.
+ *
+ * Applications that use both hwloc and glibc scheduling routines such as
+ * sched_getaffinity() or pthread_attr_setaffinity_np() may want to include
+ * this file so as to ease conversion between their respective types.
+ */
+
+#ifndef HWLOC_GLIBC_SCHED_H
+#define HWLOC_GLIBC_SCHED_H
+
+#include
+#include
+#include
+
+#if !defined _GNU_SOURCE || !defined _SCHED_H || (!defined CPU_SETSIZE && !defined sched_priority)
+#error Please make sure to include sched.h before including glibc-sched.h, and define _GNU_SOURCE before any inclusion of sched.h
+#endif
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifdef HWLOC_HAVE_CPU_SET
+
+
+/** \defgroup hwlocality_glibc_sched Interoperability with glibc sched affinity
+ *
+ * This interface offers ways to convert between hwloc cpusets and glibc cpusets
+ * such as those manipulated by sched_getaffinity() or pthread_attr_setaffinity_np().
+ *
+ * \note Topology \p topology must match the current machine.
+ *
+ * @{
+ */
+
+
+/** \brief Convert hwloc CPU set \p toposet into glibc sched affinity CPU set \p schedset
+ *
+ * This function may be used before calling sched_setaffinity or any other function
+ * that takes a cpu_set_t as input parameter.
+ *
+ * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
+ */
+static __hwloc_inline int
+hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset,
+ cpu_set_t *schedset, size_t schedsetsize)
+{
+#ifdef CPU_ZERO_S
+ unsigned cpu;
+ CPU_ZERO_S(schedsetsize, schedset);
+ hwloc_bitmap_foreach_begin(cpu, hwlocset)
+ CPU_SET_S(cpu, schedsetsize, schedset);
+ hwloc_bitmap_foreach_end();
+#else /* !CPU_ZERO_S */
+ unsigned cpu;
+ CPU_ZERO(schedset);
+ assert(schedsetsize == sizeof(cpu_set_t));
+ hwloc_bitmap_foreach_begin(cpu, hwlocset)
+ CPU_SET(cpu, schedset);
+ hwloc_bitmap_foreach_end();
+#endif /* !CPU_ZERO_S */
+ return 0;
+}
+
+/** \brief Convert glibc sched affinity CPU set \p schedset into hwloc CPU set
+ *
+ * This function may be used before calling sched_setaffinity or any other function
+ * that takes a cpu_set_t as input parameter.
+ *
+ * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
+ */
+static __hwloc_inline int
+hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset,
+ const cpu_set_t *schedset, size_t schedsetsize)
+{
+ int cpu;
+#ifdef CPU_ZERO_S
+ int count;
+#endif
+ hwloc_bitmap_zero(hwlocset);
+#ifdef CPU_ZERO_S
+ count = CPU_COUNT_S(schedsetsize, schedset);
+ cpu = 0;
+ while (count) {
+ if (CPU_ISSET_S(cpu, schedsetsize, schedset)) {
+ hwloc_bitmap_set(hwlocset, cpu);
+ count--;
+ }
+ cpu++;
+ }
+#else /* !CPU_ZERO_S */
+ /* sched.h does not support dynamic cpu_set_t (introduced in glibc 2.7),
+ * assume we have a very old interface without CPU_COUNT (added in 2.6)
+ */
+ assert(schedsetsize == sizeof(cpu_set_t));
+ for(cpu=0; cpu
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_helper_find_inside Finding Objects inside a CPU set
+ * @{
+ */
+
+/** \brief Get the first largest object included in the given cpuset \p set.
+ *
+ * \return the first object that is included in \p set and whose parent is not.
+ *
+ * This is convenient for iterating over all largest objects within a CPU set
+ * by doing a loop getting the first largest object and clearing its CPU set
+ * from the remaining CPU set.
+ *
+ * \note This function cannot work if the root object does not have a CPU set,
+ * e.g. if the topology is made of different machines.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_first_largest_obj_inside_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t set)
+{
+ hwloc_obj_t obj = hwloc_get_root_obj(topology);
+ if (!obj->cpuset || !hwloc_bitmap_intersects(obj->cpuset, set))
+ return NULL;
+ while (!hwloc_bitmap_isincluded(obj->cpuset, set)) {
+ /* while the object intersects without being included, look at its children */
+ hwloc_obj_t child = obj->first_child;
+ while (child) {
+ if (child->cpuset && hwloc_bitmap_intersects(child->cpuset, set))
+ break;
+ child = child->next_sibling;
+ }
+ if (!child)
+ /* no child intersects, return their father */
+ return obj;
+ /* found one intersecting child, look at its children */
+ obj = child;
+ }
+ /* obj is included, return it */
+ return obj;
+}
+
+/** \brief Get the set of largest objects covering exactly a given cpuset \p set
+ *
+ * \return the number of objects returned in \p objs.
+ *
+ * \note This function cannot work if the root object does not have a CPU set,
+ * e.g. if the topology is made of different machines.
+ */
+HWLOC_DECLSPEC int hwloc_get_largest_objs_inside_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ hwloc_obj_t * __hwloc_restrict objs, int max);
+
+/** \brief Return the next object at depth \p depth included in CPU set \p set.
+ *
+ * If \p prev is \c NULL, return the first object at depth \p depth
+ * included in \p set. The next invokation should pass the previous
+ * return value in \p prev so as to obtain the next object in \p set.
+ *
+ * \note This function cannot work if objects at the given depth do
+ * not have CPU sets or if the topology is made of different machines.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ unsigned depth, hwloc_obj_t prev)
+{
+ hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev);
+ if (!next || !next->cpuset)
+ return NULL;
+ while (next && !hwloc_bitmap_isincluded(next->cpuset, set))
+ next = next->next_cousin;
+ return next;
+}
+
+/** \brief Return the next object of type \p type included in CPU set \p set.
+ *
+ * If there are multiple or no depth for given type, return \c NULL
+ * and let the caller fallback to
+ * hwloc_get_next_obj_inside_cpuset_by_depth().
+ *
+ * \note This function cannot work if objects of the given type do
+ * not have CPU sets or if the topology is made of different machines.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ hwloc_obj_type_t type, hwloc_obj_t prev)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+ return NULL;
+ return hwloc_get_next_obj_inside_cpuset_by_depth(topology, set, depth, prev);
+}
+
+/** \brief Return the (logically) \p idx -th object at depth \p depth included in CPU set \p set.
+ *
+ * \note This function cannot work if objects at the given depth do
+ * not have CPU sets or if the topology is made of different machines.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ unsigned depth, unsigned idx) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ unsigned depth, unsigned idx)
+{
+ hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0);
+ unsigned count = 0;
+ if (!obj || !obj->cpuset)
+ return NULL;
+ while (obj) {
+ if (hwloc_bitmap_isincluded(obj->cpuset, set)) {
+ if (count == idx)
+ return obj;
+ count++;
+ }
+ obj = obj->next_cousin;
+ }
+ return NULL;
+}
+
+/** \brief Return the \p idx -th object of type \p type included in CPU set \p set.
+ *
+ * If there are multiple or no depth for given type, return \c NULL
+ * and let the caller fallback to
+ * hwloc_get_obj_inside_cpuset_by_depth().
+ *
+ * \note This function cannot work if objects of the given type do
+ * not have CPU sets or if the topology is made of different machines.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ hwloc_obj_type_t type, unsigned idx)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+ return NULL;
+ return hwloc_get_obj_inside_cpuset_by_depth(topology, set, depth, idx);
+}
+
+/** \brief Return the number of objects at depth \p depth included in CPU set \p set.
+ *
+ * \note This function cannot work if objects at the given depth do
+ * not have CPU sets or if the topology is made of different machines.
+ */
+static __hwloc_inline unsigned
+hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ unsigned depth) __hwloc_attribute_pure;
+static __hwloc_inline unsigned
+hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ unsigned depth)
+{
+ hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0);
+ unsigned count = 0;
+ if (!obj || !obj->cpuset)
+ return 0;
+ while (obj) {
+ if (hwloc_bitmap_isincluded(obj->cpuset, set))
+ count++;
+ obj = obj->next_cousin;
+ }
+ return count;
+}
+
+/** \brief Return the number of objects of type \p type included in CPU set \p set.
+ *
+ * If no object for that type exists inside CPU set \p set, 0 is
+ * returned. If there are several levels with objects of that type
+ * inside CPU set \p set, -1 is returned.
+ *
+ * \note This function cannot work if objects of the given type do
+ * not have CPU sets or if the topology is made of different machines.
+ */
+static __hwloc_inline int
+hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ hwloc_obj_type_t type) __hwloc_attribute_pure;
+static __hwloc_inline int
+hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ hwloc_obj_type_t type)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+ return 0;
+ if (depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+ return -1; /* FIXME: agregate nbobjs from different levels? */
+ return hwloc_get_nbobjs_inside_cpuset_by_depth(topology, set, depth);
+}
+
+/** \brief Return the logical index among the objects included in CPU set \p set.
+ *
+ * Consult all objects in the same level as \p obj and inside CPU set \p set
+ * in the logical order, and return the index of \p obj within them.
+ * If \p set covers the entire topology, this is the logical index of \p obj.
+ * Otherwise, this is similar to a logical index within the part of the topology
+ * defined by CPU set \p set.
+ */
+static __hwloc_inline int
+hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
+ hwloc_obj_t obj) __hwloc_attribute_pure;
+static __hwloc_inline int
+hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
+ hwloc_obj_t obj)
+{
+ int idx = 0;
+ if (!hwloc_bitmap_isincluded(obj->cpuset, set))
+ return -1;
+ /* count how many objects are inside the cpuset on the way from us to the beginning of the level */
+ while ((obj = obj->prev_cousin) != NULL)
+ if (hwloc_bitmap_isincluded(obj->cpuset, set))
+ idx++;
+ return idx;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_find_covering Finding Objects covering at least CPU set
+ * @{
+ */
+
+/** \brief Get the child covering at least CPU set \p set.
+ *
+ * \return \c NULL if no child matches or if \p set is empty.
+ *
+ * \note This function cannot work if parent does not have a CPU set.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
+ hwloc_obj_t parent) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
+ hwloc_obj_t parent)
+{
+ hwloc_obj_t child;
+ if (!parent->cpuset || hwloc_bitmap_iszero(set))
+ return NULL;
+ child = parent->first_child;
+ while (child) {
+ if (child->cpuset && hwloc_bitmap_isincluded(set, child->cpuset))
+ return child;
+ child = child->next_sibling;
+ }
+ return NULL;
+}
+
+/** \brief Get the lowest object covering at least CPU set \p set
+ *
+ * \return \c NULL if no object matches or if \p set is empty.
+ *
+ * \note This function cannot work if the root object does not have a CPU set,
+ * e.g. if the topology is made of different machines.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set)
+{
+ struct hwloc_obj *current = hwloc_get_root_obj(topology);
+ if (hwloc_bitmap_iszero(set) || !current->cpuset || !hwloc_bitmap_isincluded(set, current->cpuset))
+ return NULL;
+ while (1) {
+ hwloc_obj_t child = hwloc_get_child_covering_cpuset(topology, set, current);
+ if (!child)
+ return current;
+ current = child;
+ }
+}
+
+/** \brief Iterate through same-depth objects covering at least CPU set \p set
+ *
+ * If object \p prev is \c NULL, return the first object at depth \p
+ * depth covering at least part of CPU set \p set. The next
+ * invokation should pass the previous return value in \p prev so as
+ * to obtain the next object covering at least another part of \p set.
+ *
+ * \note This function cannot work if objects at the given depth do
+ * not have CPU sets or if the topology is made of different machines.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_covering_cpuset_by_depth(hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ unsigned depth, hwloc_obj_t prev)
+{
+ hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev);
+ if (!next || !next->cpuset)
+ return NULL;
+ while (next && !hwloc_bitmap_intersects(set, next->cpuset))
+ next = next->next_cousin;
+ return next;
+}
+
+/** \brief Iterate through same-type objects covering at least CPU set \p set
+ *
+ * If object \p prev is \c NULL, return the first object of type \p
+ * type covering at least part of CPU set \p set. The next invokation
+ * should pass the previous return value in \p prev so as to obtain
+ * the next object of type \p type covering at least another part of
+ * \p set.
+ *
+ * If there are no or multiple depths for type \p type, \c NULL is returned.
+ * The caller may fallback to hwloc_get_next_obj_covering_cpuset_by_depth()
+ * for each depth.
+ *
+ * \note This function cannot work if objects of the given type do
+ * not have CPU sets or if the topology is made of different machines.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_covering_cpuset_by_type(hwloc_topology_t topology, hwloc_const_cpuset_t set,
+ hwloc_obj_type_t type, hwloc_obj_t prev)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+ return NULL;
+ return hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, prev);
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_ancestors Looking at Ancestor and Child Objects
+ * @{
+ *
+ * Be sure to see the figure in \ref termsanddefs that shows a
+ * complete topology tree, including depths, child/sibling/cousin
+ * relationships, and an example of an asymmetric topology where one
+ * socket has fewer caches than its peers.
+ */
+
+/** \brief Returns the ancestor object of \p obj at depth \p depth. */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, unsigned depth, hwloc_obj_t obj) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, unsigned depth, hwloc_obj_t obj)
+{
+ hwloc_obj_t ancestor = obj;
+ if (obj->depth < depth)
+ return NULL;
+ while (ancestor && ancestor->depth > depth)
+ ancestor = ancestor->parent;
+ return ancestor;
+}
+
+/** \brief Returns the ancestor object of \p obj with type \p type. */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj)
+{
+ hwloc_obj_t ancestor = obj->parent;
+ while (ancestor && ancestor->type != type)
+ ancestor = ancestor->parent;
+ return ancestor;
+}
+
+/** \brief Returns the common parent object to objects lvl1 and lvl2 */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2)
+{
+ /* the loop isn't so easy since intermediate ancestors may have
+ * different depth, causing us to alternate between using obj1->parent
+ * and obj2->parent. Also, even if at some point we find ancestors of
+ * of the same depth, their ancestors may have different depth again.
+ */
+ while (obj1 != obj2) {
+ while (obj1->depth > obj2->depth)
+ obj1 = obj1->parent;
+ while (obj2->depth > obj1->depth)
+ obj2 = obj2->parent;
+ if (obj1 != obj2 && obj1->depth == obj2->depth) {
+ obj1 = obj1->parent;
+ obj2 = obj2->parent;
+ }
+ }
+ return obj1;
+}
+
+/** \brief Returns true if \p obj is inside the subtree beginning with ancestor object \p subtree_root.
+ *
+ * \note This function assumes that both \p obj and \p subtree_root have a \p cpuset.
+ */
+static __hwloc_inline int
+hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) __hwloc_attribute_pure;
+static __hwloc_inline int
+hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root)
+{
+ return hwloc_bitmap_isincluded(obj->cpuset, subtree_root->cpuset);
+}
+
+/** \brief Return the next child.
+ *
+ * If \p prev is \c NULL, return the first child.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t parent, hwloc_obj_t prev)
+{
+ if (!prev)
+ return parent->first_child;
+ if (prev->parent != parent)
+ return NULL;
+ return prev->next_sibling;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_find_cache Looking at Cache Objects
+ * @{
+ */
+
+/** \brief Find the depth of cache objects matching cache depth and type.
+ *
+ * Return the depth of the topology level that contains cache objects
+ * whose attributes match \p cachedepth and \p cachetype. This function
+ * intends to disambiguate the case where hwloc_get_type_depth() returns
+ * \p HWLOC_TYPE_DEPTH_MULTIPLE.
+ *
+ * If no cache level matches, \p HWLOC_TYPE_DEPTH_UNKNOWN is returned.
+ *
+ * If \p cachetype is \p HWLOC_OBJ_CACHE_UNIFIED, the depth of the
+ * unique matching unified cache level is returned.
+ *
+ * If \p cachetype is \p HWLOC_OBJ_CACHE_DATA or \p HWLOC_OBJ_CACHE_INSTRUCTION,
+ * either a matching cache, or a unified cache is returned.
+ *
+ * If \p cachetype is \c -1, it is ignored and multiple levels may
+ * match. The function returns either the depth of a uniquely matching
+ * level or \p HWLOC_TYPE_DEPTH_MULTIPLE.
+ */
+static __hwloc_inline int
+hwloc_get_cache_type_depth (hwloc_topology_t topology,
+ unsigned cachelevel, hwloc_obj_cache_type_t cachetype)
+{
+ int depth;
+ int found = HWLOC_TYPE_DEPTH_UNKNOWN;
+ for (depth=0; ; depth++) {
+ hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0);
+ if (!obj)
+ break;
+ if (obj->type != HWLOC_OBJ_CACHE || obj->attr->cache.depth != cachelevel)
+ /* doesn't match, try next depth */
+ continue;
+ if (cachetype == (hwloc_obj_cache_type_t) -1) {
+ if (found != HWLOC_TYPE_DEPTH_UNKNOWN) {
+ /* second match, return MULTIPLE */
+ return HWLOC_TYPE_DEPTH_MULTIPLE;
+ }
+ /* first match, mark it as found */
+ found = depth;
+ continue;
+ }
+ if (obj->attr->cache.type == cachetype || obj->attr->cache.type == HWLOC_OBJ_CACHE_UNIFIED)
+ /* exact match (either unified is alone, or we match instruction or data), return immediately */
+ return depth;
+ }
+ /* went to the bottom, return what we found */
+ return found;
+}
+
+/** \brief Get the first cache covering a cpuset \p set
+ *
+ * \return \c NULL if no cache matches.
+ *
+ * \note This function cannot work if the root object does not have a CPU set,
+ * e.g. if the topology is made of different machines.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set)
+{
+ hwloc_obj_t current = hwloc_get_obj_covering_cpuset(topology, set);
+ while (current) {
+ if (current->type == HWLOC_OBJ_CACHE)
+ return current;
+ current = current->parent;
+ }
+ return NULL;
+}
+
+/** \brief Get the first cache shared between an object and somebody else.
+ *
+ * \return \c NULL if no cache matches or if an invalid object is given.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj)
+{
+ hwloc_obj_t current = obj->parent;
+ if (!obj->cpuset)
+ return NULL;
+ while (current && current->cpuset) {
+ if (!hwloc_bitmap_isequal(current->cpuset, obj->cpuset)
+ && current->type == HWLOC_OBJ_CACHE)
+ return current;
+ current = current->parent;
+ }
+ return NULL;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_find_misc Finding objects, miscellaneous helpers
+ * @{
+ *
+ * Be sure to see the figure in \ref termsanddefs that shows a
+ * complete topology tree, including depths, child/sibling/cousin
+ * relationships, and an example of an asymmetric topology where one
+ * socket has fewer caches than its peers.
+ */
+
+/** \brief Returns the object of type ::HWLOC_OBJ_PU with \p os_index.
+ *
+ * \note The \p os_index field of object should most of the times only be
+ * used for pretty-printing purpose. Type ::HWLOC_OBJ_PU is the only case
+ * where \p os_index could actually be useful, when manually binding to
+ * processors.
+ * However, using CPU sets to hide this complexity should often be preferred.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index)
+{
+ hwloc_obj_t obj = NULL;
+ while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PU, obj)) != NULL)
+ if (obj->os_index == os_index)
+ return obj;
+ return NULL;
+}
+
+/** \brief Do a depth-first traversal of the topology to find and sort
+ *
+ * all objects that are at the same depth than \p src.
+ * Report in \p objs up to \p max physically closest ones to \p src.
+ *
+ * \return the number of objects returned in \p objs.
+ *
+ * \return 0 if \p src is an I/O object.
+ *
+ * \note This function requires the \p src object to have a CPU set.
+ */
+/* TODO: rather provide an iterator? Provide a way to know how much should be allocated? By returning the total number of objects instead? */
+HWLOC_DECLSPEC unsigned hwloc_get_closest_objs (hwloc_topology_t topology, hwloc_obj_t src, hwloc_obj_t * __hwloc_restrict objs, unsigned max);
+
+/** \brief Find an object below another object, both specified by types and indexes.
+ *
+ * Start from the top system object and find object of type \p type1
+ * and logical index \p idx1. Then look below this object and find another
+ * object of type \p type2 and logical index \p idx2. Indexes are specified
+ * within the parent, not withing the entire system.
+ *
+ * For instance, if type1 is SOCKET, idx1 is 2, type2 is CORE and idx2
+ * is 3, return the fourth core object below the third socket.
+ *
+ * \note This function requires these objects to have a CPU set.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_below_by_type (hwloc_topology_t topology,
+ hwloc_obj_type_t type1, unsigned idx1,
+ hwloc_obj_type_t type2, unsigned idx2) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_below_by_type (hwloc_topology_t topology,
+ hwloc_obj_type_t type1, unsigned idx1,
+ hwloc_obj_type_t type2, unsigned idx2)
+{
+ hwloc_obj_t obj;
+ obj = hwloc_get_obj_by_type (topology, type1, idx1);
+ if (!obj || !obj->cpuset)
+ return NULL;
+ return hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, type2, idx2);
+}
+
+/** \brief Find an object below a chain of objects specified by types and indexes.
+ *
+ * This is a generalized version of hwloc_get_obj_below_by_type().
+ *
+ * Arrays \p typev and \p idxv must contain \p nr types and indexes.
+ *
+ * Start from the top system object and walk the arrays \p typev and \p idxv.
+ * For each type and logical index couple in the arrays, look under the previously found
+ * object to find the index-th object of the given type.
+ * Indexes are specified within the parent, not withing the entire system.
+ *
+ * For instance, if nr is 3, typev contains NODE, SOCKET and CORE,
+ * and idxv contains 0, 1 and 2, return the third core object below
+ * the second socket below the first NUMA node.
+ *
+ * \note This function requires all these objects and the root object
+ * to have a CPU set.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv)
+{
+ hwloc_obj_t obj = hwloc_get_root_obj(topology);
+ int i;
+ for(i=0; icpuset)
+ return NULL;
+ obj = hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, typev[i], idxv[i]);
+ }
+ return obj;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_distribute Distributing items over a topology
+ * @{
+ */
+
+/** \brief Distribute \p n items over the topology under \p root
+ *
+ * Array \p cpuset will be filled with \p n cpusets recursively distributed
+ * linearly over the topology under \p root, down to depth \p until (which can
+ * be INT_MAX to distribute down to the finest level).
+ *
+ * This is typically useful when an application wants to distribute \p n
+ * threads over a machine, giving each of them as much private cache as
+ * possible and keeping them locally in number order.
+ *
+ * The caller may typically want to also call hwloc_bitmap_singlify()
+ * before binding a thread so that it does not move at all.
+ *
+ * \note This function requires the \p root object to have a CPU set.
+ */
+static __hwloc_inline void
+hwloc_distributev(hwloc_topology_t topology, hwloc_obj_t *root, unsigned n_roots, hwloc_cpuset_t *cpuset, unsigned n, unsigned until);
+static __hwloc_inline void
+hwloc_distribute(hwloc_topology_t topology, hwloc_obj_t root, hwloc_cpuset_t *set, unsigned n, unsigned until)
+{
+ unsigned i;
+ if (!root->arity || n == 1 || root->depth >= until) {
+ /* Got to the bottom, we can't split any more, put everything there. */
+ for (i=0; icpuset);
+ return;
+ }
+ hwloc_distributev(topology, root->children, root->arity, set, n, until);
+}
+
+/** \brief Distribute \p n items over the topology under \p roots
+ *
+ * This is the same as hwloc_distribute, but takes an array of roots instead of
+ * just one root.
+ *
+ * \note This function requires the \p roots objects to have a CPU set.
+ */
+static __hwloc_inline void
+hwloc_distributev(hwloc_topology_t topology, hwloc_obj_t *roots, unsigned n_roots, hwloc_cpuset_t *set, unsigned n, unsigned until)
+{
+ unsigned i;
+ unsigned tot_weight;
+ hwloc_cpuset_t *cpusetp = set;
+
+ tot_weight = 0;
+ for (i = 0; i < n_roots; i++)
+ if (roots[i]->cpuset)
+ tot_weight += hwloc_bitmap_weight(roots[i]->cpuset);
+
+ for (i = 0; i < n_roots && tot_weight; i++) {
+ /* Give to roots[i] a portion proportional to its weight */
+ unsigned weight = roots[i]->cpuset ? hwloc_bitmap_weight(roots[i]->cpuset) : 0;
+ unsigned chunk = (n * weight + tot_weight-1) / tot_weight;
+ hwloc_distribute(topology, roots[i], cpusetp, chunk, until);
+ cpusetp += chunk;
+ tot_weight -= weight;
+ n -= chunk;
+ }
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_topology_sets CPU and node sets of entire topologies
+ * @{
+ */
+/** \brief Get complete CPU set
+ *
+ * \return the complete CPU set of logical processors of the system. If the
+ * topology is the result of a combination of several systems, NULL is
+ * returned.
+ *
+ * \note The returned cpuset is not newly allocated and should thus not be
+ * changed or freed; hwloc_cpuset_dup must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_complete_cpuset(hwloc_topology_t topology)
+{
+ return hwloc_get_root_obj(topology)->complete_cpuset;
+}
+
+/** \brief Get topology CPU set
+ *
+ * \return the CPU set of logical processors of the system for which hwloc
+ * provides topology information. This is equivalent to the cpuset of the
+ * system object. If the topology is the result of a combination of several
+ * systems, NULL is returned.
+ *
+ * \note The returned cpuset is not newly allocated and should thus not be
+ * changed or freed; hwloc_cpuset_dup must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_topology_cpuset(hwloc_topology_t topology)
+{
+ return hwloc_get_root_obj(topology)->cpuset;
+}
+
+/** \brief Get online CPU set
+ *
+ * \return the CPU set of online logical processors of the system. If the
+ * topology is the result of a combination of several systems, NULL is
+ * returned.
+ *
+ * \note The returned cpuset is not newly allocated and should thus not be
+ * changed or freed; hwloc_cpuset_dup must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_online_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_online_cpuset(hwloc_topology_t topology)
+{
+ return hwloc_get_root_obj(topology)->online_cpuset;
+}
+
+/** \brief Get allowed CPU set
+ *
+ * \return the CPU set of allowed logical processors of the system. If the
+ * topology is the result of a combination of several systems, NULL is
+ * returned.
+ *
+ * \note The returned cpuset is not newly allocated and should thus not be
+ * changed or freed, hwloc_cpuset_dup must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_cpuset_t
+hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology)
+{
+ return hwloc_get_root_obj(topology)->allowed_cpuset;
+}
+
+/** \brief Get complete node set
+ *
+ * \return the complete node set of memory of the system. If the
+ * topology is the result of a combination of several systems, NULL is
+ * returned.
+ *
+ * \note The returned nodeset is not newly allocated and should thus not be
+ * changed or freed; hwloc_nodeset_dup must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_complete_nodeset(hwloc_topology_t topology)
+{
+ return hwloc_get_root_obj(topology)->complete_nodeset;
+}
+
+/** \brief Get topology node set
+ *
+ * \return the node set of memory of the system for which hwloc
+ * provides topology information. This is equivalent to the nodeset of the
+ * system object. If the topology is the result of a combination of several
+ * systems, NULL is returned.
+ *
+ * \note The returned nodeset is not newly allocated and should thus not be
+ * changed or freed; hwloc_nodeset_dup must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_topology_nodeset(hwloc_topology_t topology)
+{
+ return hwloc_get_root_obj(topology)->nodeset;
+}
+
+/** \brief Get allowed node set
+ *
+ * \return the node set of allowed memory of the system. If the
+ * topology is the result of a combination of several systems, NULL is
+ * returned.
+ *
+ * \note The returned nodeset is not newly allocated and should thus not be
+ * changed or freed, hwloc_nodeset_dup must be used to obtain a local copy.
+ */
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology) __hwloc_attribute_pure;
+static __hwloc_inline hwloc_const_nodeset_t
+hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology)
+{
+ return hwloc_get_root_obj(topology)->allowed_nodeset;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_helper_nodeset_convert Converting between CPU sets and node sets
+ *
+ * There are two semantics for converting cpusets to nodesets depending on how
+ * non-NUMA machines are handled.
+ *
+ * When manipulating nodesets for memory binding, non-NUMA machines should be
+ * considered as having a single NUMA node. The standard conversion routines
+ * below should be used so that marking the first bit of the nodeset means
+ * that memory should be bound to a non-NUMA whole machine.
+ *
+ * When manipulating nodesets as an actual list of NUMA nodes without any
+ * need to handle memory binding on non-NUMA machines, the strict conversion
+ * routines may be used instead.
+ * @{
+ */
+
+/** \brief Convert a CPU set into a NUMA node set and handle non-NUMA cases
+ *
+ * If some NUMA nodes have no CPUs at all, this function never sets their
+ * indexes in the output node set, even if a full CPU set is given in input.
+ *
+ * If the topology contains no NUMA nodes, the machine is considered
+ * as a single memory node, and the following behavior is used:
+ * If \p cpuset is empty, \p nodeset will be emptied as well.
+ * Otherwise \p nodeset will be entirely filled.
+ */
+static __hwloc_inline void
+hwloc_cpuset_to_nodeset(hwloc_topology_t topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+ hwloc_obj_t obj;
+
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
+ if (hwloc_bitmap_iszero(_cpuset))
+ hwloc_bitmap_zero(nodeset);
+ else
+ /* Assume the whole system */
+ hwloc_bitmap_fill(nodeset);
+ return;
+ }
+
+ hwloc_bitmap_zero(nodeset);
+ obj = NULL;
+ while ((obj = hwloc_get_next_obj_covering_cpuset_by_depth(topology, _cpuset, depth, obj)) != NULL)
+ hwloc_bitmap_set(nodeset, obj->os_index);
+}
+
+/** \brief Convert a CPU set into a NUMA node set without handling non-NUMA cases
+ *
+ * This is the strict variant of ::hwloc_cpuset_to_nodeset. It does not fix
+ * non-NUMA cases. If the topology contains some NUMA nodes, behave exactly
+ * the same. However, if the topology contains no NUMA nodes, return an empty
+ * nodeset.
+ */
+static __hwloc_inline void
+hwloc_cpuset_to_nodeset_strict(struct hwloc_topology *topology, hwloc_const_cpuset_t _cpuset, hwloc_nodeset_t nodeset)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+ hwloc_obj_t obj;
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN )
+ return;
+ hwloc_bitmap_zero(nodeset);
+ obj = NULL;
+ while ((obj = hwloc_get_next_obj_covering_cpuset_by_depth(topology, _cpuset, depth, obj)) != NULL)
+ hwloc_bitmap_set(nodeset, obj->os_index);
+}
+
+/** \brief Convert a NUMA node set into a CPU set and handle non-NUMA cases
+ *
+ * If the topology contains no NUMA nodes, the machine is considered
+ * as a single memory node, and the following behavior is used:
+ * If \p nodeset is empty, \p cpuset will be emptied as well.
+ * Otherwise \p cpuset will be entirely filled.
+ * This is useful for manipulating memory binding sets.
+ */
+static __hwloc_inline void
+hwloc_cpuset_from_nodeset(hwloc_topology_t topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+ hwloc_obj_t obj;
+
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN ) {
+ if (hwloc_bitmap_iszero(nodeset))
+ hwloc_bitmap_zero(_cpuset);
+ else
+ /* Assume the whole system */
+ hwloc_bitmap_fill(_cpuset);
+ return;
+ }
+
+ hwloc_bitmap_zero(_cpuset);
+ obj = NULL;
+ while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL) {
+ if (hwloc_bitmap_isset(nodeset, obj->os_index))
+ /* no need to check obj->cpuset because objects in levels always have a cpuset */
+ hwloc_bitmap_or(_cpuset, _cpuset, obj->cpuset);
+ }
+}
+
+/** \brief Convert a NUMA node set into a CPU set without handling non-NUMA cases
+ *
+ * This is the strict variant of ::hwloc_cpuset_from_nodeset. It does not fix
+ * non-NUMA cases. If the topology contains some NUMA nodes, behave exactly
+ * the same. However, if the topology contains no NUMA nodes, return an empty
+ * cpuset.
+ */
+static __hwloc_inline void
+hwloc_cpuset_from_nodeset_strict(struct hwloc_topology *topology, hwloc_cpuset_t _cpuset, hwloc_const_nodeset_t nodeset)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+ hwloc_obj_t obj;
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN )
+ return;
+ hwloc_bitmap_zero(_cpuset);
+ obj = NULL;
+ while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL)
+ if (hwloc_bitmap_isset(nodeset, obj->os_index))
+ /* no need to check obj->cpuset because objects in levels always have a cpuset */
+ hwloc_bitmap_or(_cpuset, _cpuset, obj->cpuset);
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_distances Manipulating Distances
+ * @{
+ */
+
+/** \brief Get the distances between all objects at the given depth.
+ *
+ * \return a distances structure containing a matrix with all distances
+ * between all objects at the given depth.
+ *
+ * Slot i+nbobjs*j contains the distance from the object of logical index i
+ * the object of logical index j.
+ *
+ * \note This function only returns matrices covering the whole topology,
+ * without any unknown distance value. Those matrices are available in
+ * top-level object of the hierarchy. Matrices of lower objects are not
+ * reported here since they cover only part of the machine.
+ *
+ * The returned structure belongs to the hwloc library. The caller should
+ * not modify or free it.
+ *
+ * \return \c NULL if no such distance matrix exists.
+ */
+
+static __hwloc_inline const struct hwloc_distances_s *
+hwloc_get_whole_distance_matrix_by_depth(hwloc_topology_t topology, unsigned depth)
+{
+ hwloc_obj_t root = hwloc_get_root_obj(topology);
+ unsigned i;
+ for(i=0; idistances_count; i++)
+ if (root->distances[i]->relative_depth == depth)
+ return root->distances[i];
+ return NULL;
+}
+
+/** \brief Get the distances between all objects of a given type.
+ *
+ * \return a distances structure containing a matrix with all distances
+ * between all objects of the given type.
+ *
+ * Slot i+nbobjs*j contains the distance from the object of logical index i
+ * the object of logical index j.
+ *
+ * \note This function only returns matrices covering the whole topology,
+ * without any unknown distance value. Those matrices are available in
+ * top-level object of the hierarchy. Matrices of lower objects are not
+ * reported here since they cover only part of the machine.
+ *
+ * The returned structure belongs to the hwloc library. The caller should
+ * not modify or free it.
+ *
+ * \return \c NULL if no such distance matrix exists.
+ */
+
+static __hwloc_inline const struct hwloc_distances_s *
+hwloc_get_whole_distance_matrix_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+ if (depth < 0)
+ return NULL;
+ return hwloc_get_whole_distance_matrix_by_depth(topology, depth);
+}
+
+/** \brief Get distances for the given depth and covering some objects
+ *
+ * Return a distance matrix that describes depth \p depth and covers at
+ * least object \p obj and all its children.
+ *
+ * When looking for the distance between some objects, a common ancestor should
+ * be passed in \p obj.
+ *
+ * \p firstp is set to logical index of the first object described by the matrix.
+ *
+ * The returned structure belongs to the hwloc library. The caller should
+ * not modify or free it.
+ */
+static __hwloc_inline const struct hwloc_distances_s *
+hwloc_get_distance_matrix_covering_obj_by_depth(hwloc_topology_t topology,
+ hwloc_obj_t obj, unsigned depth,
+ unsigned *firstp)
+{
+ while (obj && obj->cpuset) {
+ unsigned i;
+ for(i=0; idistances_count; i++)
+ if (obj->distances[i]->relative_depth == depth - obj->depth) {
+ if (!obj->distances[i]->nbobjs)
+ continue;
+ *firstp = hwloc_get_next_obj_inside_cpuset_by_depth(topology, obj->cpuset, depth, NULL)->logical_index;
+ return obj->distances[i];
+ }
+ obj = obj->parent;
+ }
+ return NULL;
+}
+
+/** \brief Get the latency in both directions between two objects.
+ *
+ * Look at ancestor objects from the bottom to the top until one of them
+ * contains a distance matrix that matches the objects exactly.
+ *
+ * \p latency gets the value from object \p obj1 to \p obj2, while
+ * \p reverse_latency gets the reverse-direction value, which
+ * may be different on some architectures.
+ *
+ * \return -1 if no ancestor contains a matching latency matrix.
+ */
+static __hwloc_inline int
+hwloc_get_latency(hwloc_topology_t topology,
+ hwloc_obj_t obj1, hwloc_obj_t obj2,
+ float *latency, float *reverse_latency)
+{
+ hwloc_obj_t ancestor;
+ const struct hwloc_distances_s * distances;
+ unsigned first_logical ;
+
+ if (obj1->depth != obj2->depth) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ ancestor = hwloc_get_common_ancestor_obj(topology, obj1, obj2);
+ distances = hwloc_get_distance_matrix_covering_obj_by_depth(topology, ancestor, obj1->depth, &first_logical);
+ if (distances && distances->latency) {
+ const float * latency_matrix = distances->latency;
+ unsigned nbobjs = distances->nbobjs;
+ unsigned l1 = obj1->logical_index - first_logical;
+ unsigned l2 = obj2->logical_index - first_logical;
+ *latency = latency_matrix[l1*nbobjs+l2];
+ *reverse_latency = latency_matrix[l2*nbobjs+l1];
+ return 0;
+ }
+
+ errno = ENOSYS;
+ return -1;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_advanced_io Finding I/O objects
+ * @{
+ */
+
+/** \brief Get the first non-I/O ancestor object.
+ *
+ * Given the I/O object \p ioobj, find the smallest non-I/O ancestor
+ * object. This regular object may then be used for binding because
+ * its locality is the same as \p ioobj.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_non_io_ancestor_obj(hwloc_topology_t topology __hwloc_attribute_unused,
+ hwloc_obj_t ioobj)
+{
+ hwloc_obj_t obj = ioobj;
+ while (obj && !obj->cpuset) {
+ obj = obj->parent;
+ }
+ return obj;
+}
+
+/** \brief Get the next PCI device in the system.
+ *
+ * \return the first PCI device if \p prev is \c NULL.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_pcidev(hwloc_topology_t topology, hwloc_obj_t prev)
+{
+ return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PCI_DEVICE, prev);
+}
+
+/** \brief Find the PCI device object matching the PCI bus id
+ * given domain, bus device and function PCI bus id.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_pcidev_by_busid(hwloc_topology_t topology,
+ unsigned domain, unsigned bus, unsigned dev, unsigned func)
+{
+ hwloc_obj_t obj = NULL;
+ while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) {
+ if (obj->attr->pcidev.domain == domain
+ && obj->attr->pcidev.bus == bus
+ && obj->attr->pcidev.dev == dev
+ && obj->attr->pcidev.func == func)
+ return obj;
+ }
+ return NULL;
+}
+
+/** \brief Find the PCI device object matching the PCI bus id
+ * given as a string xxxx:yy:zz.t or yy:zz.t.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_pcidev_by_busidstring(hwloc_topology_t topology, const char *busid)
+{
+ unsigned domain = 0; /* default */
+ unsigned bus, dev, func;
+
+ if (sscanf(busid, "%x:%x.%x", &bus, &dev, &func) != 3
+ && sscanf(busid, "%x:%x:%x.%x", &domain, &bus, &dev, &func) != 4) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, func);
+}
+
+/** \brief Get the next OS device in the system.
+ *
+ * \return the first OS device if \p prev is \c NULL.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_osdev(hwloc_topology_t topology, hwloc_obj_t prev)
+{
+ return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_OS_DEVICE, prev);
+}
+
+/** \brief Get the next bridge in the system.
+ *
+ * \return the first bridge if \p prev is \c NULL.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_bridge(hwloc_topology_t topology, hwloc_obj_t prev)
+{
+ return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_BRIDGE, prev);
+}
+
+/* \brief Checks whether a given bridge covers a given PCI bus.
+ */
+static __hwloc_inline int
+hwloc_bridge_covers_pcibus(hwloc_obj_t bridge,
+ unsigned domain, unsigned bus)
+{
+ return bridge->type == HWLOC_OBJ_BRIDGE
+ && bridge->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
+ && bridge->attr->bridge.downstream.pci.domain == domain
+ && bridge->attr->bridge.downstream.pci.secondary_bus <= bus
+ && bridge->attr->bridge.downstream.pci.subordinate_bus >= bus;
+}
+
+/** \brief Find the hostbridge that covers the given PCI bus.
+ *
+ * This is useful for finding the locality of a bus because
+ * it is the hostbridge parent cpuset.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_get_hostbridge_by_pcibus(hwloc_topology_t topology,
+ unsigned domain, unsigned bus)
+{
+ hwloc_obj_t obj = NULL;
+ while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL) {
+ if (hwloc_bridge_covers_pcibus(obj, domain, bus)) {
+ /* found bridge covering this pcibus, make sure it's a hostbridge */
+ assert(obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST);
+ assert(obj->parent->type != HWLOC_OBJ_BRIDGE);
+ assert(obj->parent->cpuset);
+ return obj;
+ }
+ }
+ return NULL;
+}
+
+/** @} */
+
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_HELPER_H */
diff --git a/ext/hwloc/include/hwloc/inlines.h b/ext/hwloc/include/hwloc/inlines.h
new file mode 100644
index 000000000..34d845c10
--- /dev/null
+++ b/ext/hwloc/include/hwloc/inlines.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2013 Inria. All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux 1
+ * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/**
+ * This file contains the inline code of functions declared in hwloc.h
+ */
+
+#ifndef HWLOC_INLINES_H
+#define HWLOC_INLINES_H
+
+#ifndef HWLOC_H
+#error Please include the main hwloc.h instead
+#endif
+
+#include
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static __hwloc_inline int
+hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+
+ if (depth != HWLOC_TYPE_DEPTH_UNKNOWN)
+ return depth;
+
+ /* find the highest existing level with type order >= */
+ for(depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); ; depth--)
+ if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) < 0)
+ return depth+1;
+
+ /* Shouldn't ever happen, as there is always a SYSTEM level with lower order and known depth. */
+ /* abort(); */
+}
+
+static __hwloc_inline int
+hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+
+ if (depth != HWLOC_TYPE_DEPTH_UNKNOWN)
+ return depth;
+
+ /* find the lowest existing level with type order <= */
+ for(depth = 0; ; depth++)
+ if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) > 0)
+ return depth-1;
+
+ /* Shouldn't ever happen, as there is always a PU level with higher order and known depth. */
+ /* abort(); */
+}
+
+static __hwloc_inline int
+hwloc_get_nbobjs_by_type (hwloc_topology_t topology, hwloc_obj_type_t type)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+ return 0;
+ if (depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+ return -1; /* FIXME: agregate nbobjs from different levels? */
+ return hwloc_get_nbobjs_by_depth(topology, depth);
+}
+
+static __hwloc_inline hwloc_obj_t
+hwloc_get_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, unsigned idx)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
+ return NULL;
+ if (depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+ return NULL;
+ return hwloc_get_obj_by_depth(topology, depth, idx);
+}
+
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev)
+{
+ if (!prev)
+ return hwloc_get_obj_by_depth (topology, depth, 0);
+ if (prev->depth != depth)
+ return NULL;
+ return prev->next_cousin;
+}
+
+static __hwloc_inline hwloc_obj_t
+hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type,
+ hwloc_obj_t prev)
+{
+ int depth = hwloc_get_type_depth(topology, type);
+ if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
+ return NULL;
+ return hwloc_get_next_obj_by_depth (topology, depth, prev);
+}
+
+static __hwloc_inline hwloc_obj_t
+hwloc_get_root_obj (hwloc_topology_t topology)
+{
+ return hwloc_get_obj_by_depth (topology, 0, 0);
+}
+
+static __hwloc_inline const char *
+hwloc_obj_get_info_by_name(hwloc_obj_t obj, const char *name)
+{
+ unsigned i;
+ for(i=0; iinfos_count; i++)
+ if (!strcmp(obj->infos[i].name, name))
+ return obj->infos[i].value;
+ return NULL;
+}
+
+static __hwloc_inline void *
+hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+ void *p = hwloc_alloc_membind_nodeset(topology, len, nodeset, policy, flags);
+ if (p)
+ return p;
+ hwloc_set_membind_nodeset(topology, nodeset, policy, flags);
+ p = hwloc_alloc(topology, len);
+ if (p && policy != HWLOC_MEMBIND_FIRSTTOUCH)
+ /* Enforce the binding by touching the data */
+ memset(p, 0, len);
+ return p;
+}
+
+static __hwloc_inline void *
+hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags)
+{
+ void *p = hwloc_alloc_membind(topology, len, set, policy, flags);
+ if (p)
+ return p;
+ hwloc_set_membind(topology, set, policy, flags);
+ p = hwloc_alloc(topology, len);
+ if (p && policy != HWLOC_MEMBIND_FIRSTTOUCH)
+ /* Enforce the binding by touching the data */
+ memset(p, 0, len);
+ return p;
+}
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_INLINES_H */
diff --git a/ext/hwloc/include/hwloc/intel-mic.h b/ext/hwloc/include/hwloc/intel-mic.h
new file mode 100644
index 000000000..d58237b3d
--- /dev/null
+++ b/ext/hwloc/include/hwloc/intel-mic.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright © 2013 Inria. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and Intel Xeon Phi (MIC).
+ *
+ * Applications that use both hwloc and Intel Xeon Phi (MIC) may want to
+ * include this file so as to get topology information for MIC devices.
+ */
+
+#ifndef HWLOC_INTEL_MIC_H
+#define HWLOC_INTEL_MIC_H
+
+#include
+#include
+#include
+#ifdef HWLOC_LINUX_SYS
+#include
+#include
+#include
+#endif
+
+#include
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_intel_mic Interoperability with Intel Xeon Phi (MIC)
+ *
+ * This interface offers ways to retrieve topology information about
+ * Intel Xeon Phi (MIC) devices.
+ *
+ * @{
+ */
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to MIC device whose index is \p idx.
+ *
+ * Return the CPU set describing the locality of the MIC device whose index is \p idx.
+ *
+ * Topology \p topology and device index \p idx must match the local machine.
+ * I/O devices detection is not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_intel_mic_get_device_osdev_by_index().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux; other systems will simply get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_intel_mic_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+ int idx __hwloc_attribute_unused,
+ hwloc_cpuset_t set)
+{
+#ifdef HWLOC_LINUX_SYS
+ /* If we're on Linux, use the sysfs mechanism to get the local cpus */
+#define HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX 128
+ char path[HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX];
+ DIR *sysdir = NULL;
+ FILE *sysfile = NULL;
+ struct dirent *dirent;
+ unsigned pcibus, pcidev, pcifunc;
+
+ if (!hwloc_topology_is_thissystem(topology)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ sprintf(path, "/sys/class/mic/mic%d", idx);
+ sysdir = opendir(path);
+ if (!sysdir)
+ return -1;
+
+ while ((dirent = readdir(sysdir)) != NULL) {
+ if (sscanf(dirent->d_name, "pci_%02x:%02x.%02x", &pcibus, &pcidev, &pcifunc) == 3) {
+ sprintf(path, "/sys/class/mic/mic%d/pci_%02x:%02x.%02x/local_cpus", idx, pcibus, pcidev, pcifunc);
+ sysfile = fopen(path, "r");
+ if (!sysfile) {
+ closedir(sysdir);
+ return -1;
+ }
+
+ hwloc_linux_parse_cpumap_file(sysfile, set);
+ if (hwloc_bitmap_iszero(set))
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+
+ fclose(sysfile);
+ break;
+ }
+ }
+
+ closedir(sysdir);
+#else
+ /* Non-Linux systems simply get a full cpuset */
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+ return 0;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * MIC device for the given index.
+ *
+ * Return the OS device object describing the MIC device whose index is \p idx.
+ * Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_intel_mic_get_device_osdev_by_index(hwloc_topology_t topology,
+ unsigned idx)
+{
+ hwloc_obj_t osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
+ && osdev->name
+ && !strncmp("mic", osdev->name, 3)
+ && atoi(osdev->name + 3) == (int) idx)
+ return osdev;
+ }
+ return NULL;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_INTEL_MIC_H */
diff --git a/ext/hwloc/include/hwloc/linux-libnuma.h b/ext/hwloc/include/hwloc/linux-libnuma.h
new file mode 100644
index 000000000..f74950437
--- /dev/null
+++ b/ext/hwloc/include/hwloc/linux-libnuma.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2013 Inria. All rights reserved.
+ * Copyright © 2009-2010, 2012 Université Bordeaux 1
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and Linux libnuma.
+ *
+ * Applications that use both Linux libnuma and hwloc may want to
+ * include this file so as to ease conversion between their respective types.
+*/
+
+#ifndef HWLOC_LINUX_LIBNUMA_H
+#define HWLOC_LINUX_LIBNUMA_H
+
+#include
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_linux_libnuma_ulongs Interoperability with Linux libnuma unsigned long masks
+ *
+ * This interface helps converting between Linux libnuma unsigned long masks
+ * and hwloc cpusets and nodesets.
+ *
+ * It also offers a consistent behavior on non-NUMA machines
+ * or non-NUMA-aware kernels by assuming that the machines have a single
+ * NUMA node.
+ *
+ * \note Topology \p topology must match the current machine.
+ *
+ * \note The behavior of libnuma is undefined if the kernel is not NUMA-aware.
+ * (when CONFIG_NUMA is not set in the kernel configuration).
+ * This helper and libnuma may thus not be strictly compatible in this case,
+ * which may be detected by checking whether numa_available() returns -1.
+ *
+ * @{
+ */
+
+
+/** \brief Convert hwloc CPU set \p cpuset into the array of unsigned long \p mask
+ *
+ * \p mask is the array of unsigned long that will be filled.
+ * \p maxnode contains the maximal node number that may be stored in \p mask.
+ * \p maxnode will be set to the maximal node number that was found, plus one.
+ *
+ * This function may be used before calling set_mempolicy, mbind, migrate_pages
+ * or any other function that takes an array of unsigned long and a maximal
+ * node number as input parameter.
+ */
+static __hwloc_inline int
+hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset,
+ unsigned long *mask, unsigned long *maxnode)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+ unsigned long outmaxnode = -1;
+
+ /* round-up to the next ulong and clear all bytes */
+ *maxnode = (*maxnode + 8*sizeof(*mask) - 1) & ~(8*sizeof(*mask) - 1);
+ memset(mask, 0, *maxnode/8);
+
+ if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
+ hwloc_obj_t node = NULL;
+ while ((node = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, node)) != NULL) {
+ if (node->os_index >= *maxnode)
+ continue;
+ mask[node->os_index/sizeof(*mask)/8] |= 1UL << (node->os_index % (sizeof(*mask)*8));
+ if (outmaxnode == (unsigned long) -1 || outmaxnode < node->os_index)
+ outmaxnode = node->os_index;
+ }
+
+ } else {
+ /* if no numa, libnuma assumes we have a single node */
+ if (!hwloc_bitmap_iszero(cpuset)) {
+ mask[0] = 1;
+ outmaxnode = 0;
+ }
+ }
+
+ *maxnode = outmaxnode+1;
+ return 0;
+}
+
+/** \brief Convert hwloc NUMA node set \p nodeset into the array of unsigned long \p mask
+ *
+ * \p mask is the array of unsigned long that will be filled.
+ * \p maxnode contains the maximal node number that may be stored in \p mask.
+ * \p maxnode will be set to the maximal node number that was found, plus one.
+ *
+ * This function may be used before calling set_mempolicy, mbind, migrate_pages
+ * or any other function that takes an array of unsigned long and a maximal
+ * node number as input parameter.
+ */
+static __hwloc_inline int
+hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset,
+ unsigned long *mask, unsigned long *maxnode)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+ unsigned long outmaxnode = -1;
+
+ /* round-up to the next ulong and clear all bytes */
+ *maxnode = (*maxnode + 8*sizeof(*mask) - 1) & ~(8*sizeof(*mask) - 1);
+ memset(mask, 0, *maxnode/8);
+
+ if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
+ hwloc_obj_t node = NULL;
+ while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) {
+ if (node->os_index >= *maxnode)
+ continue;
+ if (!hwloc_bitmap_isset(nodeset, node->os_index))
+ continue;
+ mask[node->os_index/sizeof(*mask)/8] |= 1UL << (node->os_index % (sizeof(*mask)*8));
+ if (outmaxnode == (unsigned long) -1 || outmaxnode < node->os_index)
+ outmaxnode = node->os_index;
+ }
+
+ } else {
+ /* if no numa, libnuma assumes we have a single node */
+ if (!hwloc_bitmap_iszero(nodeset)) {
+ mask[0] = 1;
+ outmaxnode = 0;
+ }
+ }
+
+ *maxnode = outmaxnode+1;
+ return 0;
+}
+
+/** \brief Convert the array of unsigned long \p mask into hwloc CPU set
+ *
+ * \p mask is a array of unsigned long that will be read.
+ * \p maxnode contains the maximal node number that may be read in \p mask.
+ *
+ * This function may be used after calling get_mempolicy or any other function
+ * that takes an array of unsigned long as output parameter (and possibly
+ * a maximal node number as input parameter).
+ */
+static __hwloc_inline int
+hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
+ const unsigned long *mask, unsigned long maxnode)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+
+ if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
+ hwloc_obj_t node = NULL;
+ hwloc_bitmap_zero(cpuset);
+ while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
+ if (node->os_index < maxnode
+ && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
+ hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
+ } else {
+ /* if no numa, libnuma assumes we have a single node */
+ if (mask[0] & 1)
+ hwloc_bitmap_copy(cpuset, hwloc_topology_get_complete_cpuset(topology));
+ else
+ hwloc_bitmap_zero(cpuset);
+ }
+
+ return 0;
+}
+
+/** \brief Convert the array of unsigned long \p mask into hwloc NUMA node set
+ *
+ * \p mask is a array of unsigned long that will be read.
+ * \p maxnode contains the maximal node number that may be read in \p mask.
+ *
+ * This function may be used after calling get_mempolicy or any other function
+ * that takes an array of unsigned long as output parameter (and possibly
+ * a maximal node number as input parameter).
+ */
+static __hwloc_inline int
+hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
+ const unsigned long *mask, unsigned long maxnode)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+
+ if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
+ hwloc_obj_t node = NULL;
+ hwloc_bitmap_zero(nodeset);
+ while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
+ if (node->os_index < maxnode
+ && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
+ hwloc_bitmap_set(nodeset, node->os_index);
+ } else {
+ /* if no numa, libnuma assumes we have a single node */
+ if (mask[0] & 1)
+ hwloc_bitmap_fill(nodeset);
+ else
+ hwloc_bitmap_zero(nodeset);
+ }
+
+ return 0;
+}
+
+/** @} */
+
+
+
+/** \defgroup hwlocality_linux_libnuma_bitmask Interoperability with Linux libnuma bitmask
+ *
+ * This interface helps converting between Linux libnuma bitmasks
+ * and hwloc cpusets and nodesets.
+ *
+ * It also offers a consistent behavior on non-NUMA machines
+ * or non-NUMA-aware kernels by assuming that the machines have a single
+ * NUMA node.
+ *
+ * \note Topology \p topology must match the current machine.
+ *
+ * \note The behavior of libnuma is undefined if the kernel is not NUMA-aware.
+ * (when CONFIG_NUMA is not set in the kernel configuration).
+ * This helper and libnuma may thus not be strictly compatible in this case,
+ * which may be detected by checking whether numa_available() returns -1.
+ *
+ * @{
+ */
+
+
+/** \brief Convert hwloc CPU set \p cpuset into the returned libnuma bitmask
+ *
+ * The returned bitmask should later be freed with numa_bitmask_free.
+ *
+ * This function may be used before calling many numa_ functions
+ * that use a struct bitmask as an input parameter.
+ *
+ * \return newly allocated struct bitmask.
+ */
+static __hwloc_inline struct bitmask *
+hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc;
+static __hwloc_inline struct bitmask *
+hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+ struct bitmask *bitmask = numa_allocate_cpumask();
+ if (!bitmask)
+ return NULL;
+
+ if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
+ hwloc_obj_t node = NULL;
+ while ((node = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, node)) != NULL)
+ if (node->memory.local_memory)
+ numa_bitmask_setbit(bitmask, node->os_index);
+ } else {
+ /* if no numa, libnuma assumes we have a single node */
+ if (!hwloc_bitmap_iszero(cpuset))
+ numa_bitmask_setbit(bitmask, 0);
+ }
+
+ return bitmask;
+}
+
+/** \brief Convert hwloc NUMA node set \p nodeset into the returned libnuma bitmask
+ *
+ * The returned bitmask should later be freed with numa_bitmask_free.
+ *
+ * This function may be used before calling many numa_ functions
+ * that use a struct bitmask as an input parameter.
+ *
+ * \return newly allocated struct bitmask.
+ */
+static __hwloc_inline struct bitmask *
+hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc;
+static __hwloc_inline struct bitmask *
+hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+ struct bitmask *bitmask = numa_allocate_cpumask();
+ if (!bitmask)
+ return NULL;
+
+ if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
+ hwloc_obj_t node = NULL;
+ while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
+ if (hwloc_bitmap_isset(nodeset, node->os_index) && node->memory.local_memory)
+ numa_bitmask_setbit(bitmask, node->os_index);
+ } else {
+ /* if no numa, libnuma assumes we have a single node */
+ if (!hwloc_bitmap_iszero(nodeset))
+ numa_bitmask_setbit(bitmask, 0);
+ }
+
+ return bitmask;
+}
+
+/** \brief Convert libnuma bitmask \p bitmask into hwloc CPU set \p cpuset
+ *
+ * This function may be used after calling many numa_ functions
+ * that use a struct bitmask as an output parameter.
+ */
+static __hwloc_inline int
+hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
+ const struct bitmask *bitmask)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+
+ if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
+ hwloc_obj_t node = NULL;
+ hwloc_bitmap_zero(cpuset);
+ while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
+ if (numa_bitmask_isbitset(bitmask, node->os_index))
+ hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
+ } else {
+ /* if no numa, libnuma assumes we have a single node */
+ if (numa_bitmask_isbitset(bitmask, 0))
+ hwloc_bitmap_copy(cpuset, hwloc_topology_get_complete_cpuset(topology));
+ else
+ hwloc_bitmap_zero(cpuset);
+ }
+
+ return 0;
+}
+
+/** \brief Convert libnuma bitmask \p bitmask into hwloc NUMA node set \p nodeset
+ *
+ * This function may be used after calling many numa_ functions
+ * that use a struct bitmask as an output parameter.
+ */
+static __hwloc_inline int
+hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
+ const struct bitmask *bitmask)
+{
+ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE);
+
+ if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
+ hwloc_obj_t node = NULL;
+ hwloc_bitmap_zero(nodeset);
+ while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
+ if (numa_bitmask_isbitset(bitmask, node->os_index))
+ hwloc_bitmap_set(nodeset, node->os_index);
+ } else {
+ /* if no numa, libnuma assumes we have a single node */
+ if (numa_bitmask_isbitset(bitmask, 0))
+ hwloc_bitmap_fill(nodeset);
+ else
+ hwloc_bitmap_zero(nodeset);
+ }
+
+ return 0;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_LINUX_NUMA_H */
diff --git a/ext/hwloc/include/hwloc/linux.h b/ext/hwloc/include/hwloc/linux.h
new file mode 100644
index 000000000..1df904651
--- /dev/null
+++ b/ext/hwloc/include/hwloc/linux.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2013 Inria. All rights reserved.
+ * Copyright © 2009-2011 Université Bordeaux 1
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and Linux.
+ *
+ * Applications that use hwloc on Linux may want to include this file
+ * if using some low-level Linux features.
+ */
+
+#ifndef HWLOC_LINUX_H
+#define HWLOC_LINUX_H
+
+#include
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_linux Linux-specific helpers
+ *
+ * This includes helpers for manipulating Linux kernel cpumap files, and hwloc
+ * equivalents of the Linux sched_setaffinity and sched_getaffinity system calls.
+ *
+ * @{
+ */
+
+/** \brief Convert a linux kernel cpumap file \p file into hwloc CPU set.
+ *
+ * Might be used when reading CPU set from sysfs attributes such as topology
+ * and caches for processors, or local_cpus for devices.
+ */
+HWLOC_DECLSPEC int hwloc_linux_parse_cpumap_file(FILE *file, hwloc_cpuset_t set);
+
+/** \brief Bind a thread \p tid on cpus given in cpuset \p set
+ *
+ * The behavior is exactly the same as the Linux sched_setaffinity system call,
+ * but uses a hwloc cpuset.
+ *
+ * \note This is equivalent to calling hwloc_set_proc_cpubind() with
+ * HWLOC_CPUBIND_THREAD as flags.
+ */
+HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_const_cpuset_t set);
+
+/** \brief Get the current binding of thread \p tid
+ *
+ * The behavior is exactly the same as the Linux sched_getaffinity system call,
+ * but uses a hwloc cpuset.
+ *
+ * \note This is equivalent to calling hwloc_get_proc_cpubind() with
+ * HWLOC_CPUBIND_THREAD as flags.
+ */
+HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set);
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_GLIBC_SCHED_H */
diff --git a/ext/hwloc/include/hwloc/myriexpress.h b/ext/hwloc/include/hwloc/myriexpress.h
new file mode 100644
index 000000000..ac751bcfb
--- /dev/null
+++ b/ext/hwloc/include/hwloc/myriexpress.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright © 2010-2013 Inria. All rights reserved.
+ * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and Myrinet Express.
+ *
+ * Applications that use both hwloc and Myrinet Express verbs may want to
+ * include this file so as to get topology information for Myrinet hardware.
+ *
+ */
+
+#ifndef HWLOC_MYRIEXPRESS_H
+#define HWLOC_MYRIEXPRESS_H
+
+#include
+#include
+
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_myriexpress Interoperability with Myrinet Express
+ *
+ * This interface offers ways to retrieve topology information about
+ * Myrinet Express hardware.
+ *
+ * @{
+ */
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close the MX board \p id.
+ *
+ * Return the CPU set describing the locality of the Myrinet Express
+ * board whose index is \p id.
+ *
+ * Topology \p topology and device \p id must match the local machine.
+ * I/O devices detection is not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * No additional information about the device is available.
+ */
+static __hwloc_inline int
+hwloc_mx_board_get_device_cpuset(hwloc_topology_t topology,
+ unsigned id, hwloc_cpuset_t set)
+{
+ uint32_t in, out;
+
+ if (!hwloc_topology_is_thissystem(topology)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ in = id;
+ if (mx_get_info(NULL, MX_NUMA_NODE, &in, sizeof(in), &out, sizeof(out)) != MX_SUCCESS) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (out != (uint32_t) -1) {
+ hwloc_obj_t obj = NULL;
+ while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NODE, obj)) != NULL)
+ if (obj->os_index == out) {
+ hwloc_bitmap_copy(set, obj->cpuset);
+ goto out;
+ }
+ }
+ /* fallback to the full topology cpuset */
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+
+ out:
+ return 0;
+}
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close the MX endpoint \p endpoint.
+ *
+ * Return the CPU set describing the locality of the Myrinet Express
+ * board that runs the MX endpoint \p endpoint.
+ *
+ * Topology \p topology and device \p id must match the local machine.
+ * I/O devices detection is not needed in the topology.
+ *
+ * The function only returns the locality of the endpoint.
+ * No additional information about the endpoint or device is available.
+ */
+static __hwloc_inline int
+hwloc_mx_endpoint_get_device_cpuset(hwloc_topology_t topology,
+ mx_endpoint_t endpoint, hwloc_cpuset_t set)
+{
+ uint64_t nid;
+ uint32_t nindex, eid;
+ mx_endpoint_addr_t eaddr;
+
+ if (mx_get_endpoint_addr(endpoint, &eaddr) != MX_SUCCESS) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (mx_decompose_endpoint_addr(eaddr, &nid, &eid) != MX_SUCCESS) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (mx_nic_id_to_board_number(nid, &nindex) != MX_SUCCESS) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ return hwloc_mx_board_get_device_cpuset(topology, nindex, set);
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_MYRIEXPRESS_H */
diff --git a/ext/hwloc/include/hwloc/nvml.h b/ext/hwloc/include/hwloc/nvml.h
new file mode 100644
index 000000000..462b33266
--- /dev/null
+++ b/ext/hwloc/include/hwloc/nvml.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright © 2012-2013 Inria. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and the NVIDIA Management Library.
+ *
+ * Applications that use both hwloc and the NVIDIA Management Library may want to
+ * include this file so as to get topology information for NVML devices.
+ */
+
+#ifndef HWLOC_NVML_H
+#define HWLOC_NVML_H
+
+#include
+#include
+#include
+#ifdef HWLOC_LINUX_SYS
+#include
+#endif
+
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_nvml Interoperability with the NVIDIA Management Library
+ *
+ * This interface offers ways to retrieve topology information about
+ * devices managed by the NVIDIA Management Library (NVML).
+ *
+ * @{
+ */
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to NVML device \p device.
+ *
+ * Return the CPU set describing the locality of the NVML device \p device.
+ *
+ * Topology \p topology and device \p device must match the local machine.
+ * I/O devices detection and the NVML component are not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_nvml_get_device_osdev()
+ * and hwloc_nvml_get_device_osdev_by_index().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux; other systems will simply get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+ nvmlDevice_t device, hwloc_cpuset_t set)
+{
+#ifdef HWLOC_LINUX_SYS
+ /* If we're on Linux, use the sysfs mechanism to get the local cpus */
+#define HWLOC_NVML_DEVICE_SYSFS_PATH_MAX 128
+ char path[HWLOC_NVML_DEVICE_SYSFS_PATH_MAX];
+ FILE *sysfile = NULL;
+ nvmlReturn_t nvres;
+ nvmlPciInfo_t pci;
+
+ if (!hwloc_topology_is_thissystem(topology)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ nvres = nvmlDeviceGetPciInfo(device, &pci);
+ if (NVML_SUCCESS != nvres) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", pci.domain, pci.bus, pci.device);
+ sysfile = fopen(path, "r");
+ if (!sysfile)
+ return -1;
+
+ hwloc_linux_parse_cpumap_file(sysfile, set);
+ if (hwloc_bitmap_iszero(set))
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+
+ fclose(sysfile);
+#else
+ /* Non-Linux systems simply get a full cpuset */
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+ return 0;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * NVML device whose index is \p idx.
+ *
+ * Return the OS device object describing the NVML device whose
+ * index is \p idx. Returns NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the NVML component must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_nvml_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
+{
+ hwloc_obj_t osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
+ && osdev->name
+ && !strncmp("nvml", osdev->name, 4)
+ && atoi(osdev->name + 4) == (int) idx)
+ return osdev;
+ }
+ return NULL;
+}
+
+/** \brief Get the hwloc OS device object corresponding to NVML device \p device.
+ *
+ * Return the hwloc OS device object that describes the given
+ * NVML device \p device. Return NULL if there is none.
+ *
+ * Topology \p topology and device \p device must match the local machine.
+ * I/O devices detection and the NVML component must be enabled in the topology.
+ * If not, the locality of the object may still be found using
+ * hwloc_nvml_get_device_cpuset().
+ *
+ * \note The corresponding hwloc PCI device may be found by looking
+ * at the result parent pointer.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_nvml_get_device_osdev(hwloc_topology_t topology, nvmlDevice_t device)
+{
+ hwloc_obj_t osdev;
+ nvmlReturn_t nvres;
+ nvmlPciInfo_t pci;
+
+ if (!hwloc_topology_is_thissystem(topology)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ nvres = nvmlDeviceGetPciInfo(device, &pci);
+ if (NVML_SUCCESS != nvres)
+ return NULL;
+
+ osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ hwloc_obj_t pcidev = osdev->parent;
+ if (strncmp(osdev->name, "nvml", 4))
+ continue;
+ if (pcidev
+ && pcidev->type == HWLOC_OBJ_PCI_DEVICE
+ && pcidev->attr->pcidev.domain == pci.domain
+ && pcidev->attr->pcidev.bus == pci.bus
+ && pcidev->attr->pcidev.dev == pci.device
+ && pcidev->attr->pcidev.func == 0)
+ return osdev;
+ }
+
+ return NULL;
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_NVML_H */
diff --git a/ext/hwloc/include/hwloc/opencl.h b/ext/hwloc/include/hwloc/opencl.h
new file mode 100644
index 000000000..00c97580b
--- /dev/null
+++ b/ext/hwloc/include/hwloc/opencl.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright © 2012-2013 Inria. All rights reserved.
+ * Copyright © 2013 Université Bordeaux 1. All right reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and the OpenCL interface.
+ *
+ * Applications that use both hwloc and OpenCL may want to
+ * include this file so as to get topology information for OpenCL devices.
+ */
+
+#ifndef HWLOC_OPENCL_H
+#define HWLOC_OPENCL_H
+
+#include
+#include
+#include
+#ifdef HWLOC_LINUX_SYS
+#include
+#endif
+
+#include
+#include
+
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_opencl Interoperability with OpenCL
+ *
+ * This interface offers ways to retrieve topology information about
+ * OpenCL devices.
+ *
+ * Only the AMD OpenCL interface currently offers useful locality information
+ * about its devices.
+ *
+ * @{
+ */
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to OpenCL device \p device.
+ *
+ * Return the CPU set describing the locality of the OpenCL device \p device.
+ *
+ * Topology \p topology and device \p device must match the local machine.
+ * I/O devices detection and the OpenCL component are not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_opencl_get_device_osdev()
+ * and hwloc_opencl_get_device_osdev_by_index().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux with the AMD OpenCL implementation; other systems will simply
+ * get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+ cl_device_id device __hwloc_attribute_unused,
+ hwloc_cpuset_t set)
+{
+#if (defined HWLOC_LINUX_SYS) && (defined CL_DEVICE_TOPOLOGY_AMD)
+ /* If we're on Linux + AMD OpenCL, use the AMD extension + the sysfs mechanism to get the local cpus */
+#define HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX 128
+ char path[HWLOC_OPENCL_DEVICE_SYSFS_PATH_MAX];
+ FILE *sysfile = NULL;
+ cl_device_topology_amd amdtopo;
+ cl_int clret;
+
+ if (!hwloc_topology_is_thissystem(topology)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
+ if (CL_SUCCESS != clret) {
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+ return 0;
+ }
+ if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+ return 0;
+ }
+
+ sprintf(path, "/sys/bus/pci/devices/0000:%02x:%02x.%01x/local_cpus", amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
+ sysfile = fopen(path, "r");
+ if (!sysfile)
+ return -1;
+
+ hwloc_linux_parse_cpumap_file(sysfile, set);
+ if (hwloc_bitmap_iszero(set))
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+
+ fclose(sysfile);
+#else
+ /* Non-Linux + AMD OpenCL systems simply get a full cpuset */
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+ return 0;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the
+ * OpenCL device for the given indexes.
+ *
+ * Return the OS device object describing the OpenCL device
+ * whose platform index is \p platform_index,
+ * and whose device index within this platform if \p device_index.
+ * Return NULL if there is none.
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection and the OpenCL component must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology,
+ unsigned platform_index, unsigned device_index)
+{
+ unsigned x = (unsigned) -1, y = (unsigned) -1;
+ hwloc_obj_t osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
+ && osdev->name
+ && sscanf(osdev->name, "opencl%ud%u", &x, &y) == 2
+ && platform_index == x && device_index == y)
+ return osdev;
+ }
+ return NULL;
+}
+
+/** \brief Get the hwloc OS device object corresponding to OpenCL device \p device.
+ *
+ * Return the hwloc OS device object that describes the given
+ * OpenCL device \p device. Return NULL if there is none.
+ *
+ * Topology \p topology and device \p device must match the local machine.
+ * I/O devices detection and the OpenCL component must be enabled in the topology.
+ * If not, the locality of the object may still be found using
+ * hwloc_opencl_get_device_cpuset().
+ *
+ * \note The corresponding hwloc PCI device may be found by looking
+ * at the result parent pointer.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_opencl_get_device_osdev(hwloc_topology_t topology __hwloc_attribute_unused,
+ cl_device_id device __hwloc_attribute_unused)
+{
+#ifdef CL_DEVICE_TOPOLOGY_AMD
+ hwloc_obj_t osdev;
+ cl_device_topology_amd amdtopo;
+ cl_int clret;
+
+ clret = clGetDeviceInfo(device, CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
+ if (CL_SUCCESS != clret) {
+ errno = EINVAL;
+ return NULL;
+ }
+ if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ hwloc_obj_t pcidev = osdev->parent;
+ if (strncmp(osdev->name, "opencl", 6))
+ continue;
+ if (pcidev
+ && pcidev->type == HWLOC_OBJ_PCI_DEVICE
+ && pcidev->attr->pcidev.domain == 0
+ && pcidev->attr->pcidev.bus == amdtopo.pcie.bus
+ && pcidev->attr->pcidev.dev == amdtopo.pcie.device
+ && pcidev->attr->pcidev.func == amdtopo.pcie.function)
+ return osdev;
+ }
+
+ return NULL;
+#else
+ return NULL;
+#endif
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_OPENCL_H */
diff --git a/ext/hwloc/include/hwloc/openfabrics-verbs.h b/ext/hwloc/include/hwloc/openfabrics-verbs.h
new file mode 100644
index 000000000..69f86fe1b
--- /dev/null
+++ b/ext/hwloc/include/hwloc/openfabrics-verbs.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2013 Inria. All rights reserved.
+ * Copyright © 2009-2010 Université Bordeaux 1
+ * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/** \file
+ * \brief Macros to help interaction between hwloc and OpenFabrics
+ * verbs.
+ *
+ * Applications that use both hwloc and OpenFabrics verbs may want to
+ * include this file so as to get topology information for OpenFabrics
+ * hardware.
+ *
+ */
+
+#ifndef HWLOC_OPENFABRICS_VERBS_H
+#define HWLOC_OPENFABRICS_VERBS_H
+
+#include
+#include
+#ifdef HWLOC_LINUX_SYS
+#include
+#endif
+
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \defgroup hwlocality_openfabrics Interoperability with OpenFabrics
+ *
+ * This interface offers ways to retrieve topology information about
+ * OpenFabrics devices.
+ *
+ * @{
+ */
+
+/** \brief Get the CPU set of logical processors that are physically
+ * close to device \p ibdev.
+ *
+ * Return the CPU set describing the locality of the OpenFabrics
+ * device \p ibdev.
+ *
+ * Topology \p topology and device \p ibdev must match the local machine.
+ * I/O devices detection is not needed in the topology.
+ *
+ * The function only returns the locality of the device.
+ * If more information about the device is needed, OS objects should
+ * be used instead, see hwloc_ibv_get_device_osdev()
+ * and hwloc_ibv_get_device_osdev_by_name().
+ *
+ * This function is currently only implemented in a meaningful way for
+ * Linux; other systems will simply get a full cpuset.
+ */
+static __hwloc_inline int
+hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
+ struct ibv_device *ibdev, hwloc_cpuset_t set)
+{
+#ifdef HWLOC_LINUX_SYS
+ /* If we're on Linux, use the verbs-provided sysfs mechanism to
+ get the local cpus */
+#define HWLOC_OPENFABRICS_VERBS_SYSFS_PATH_MAX 128
+ char path[HWLOC_OPENFABRICS_VERBS_SYSFS_PATH_MAX];
+ FILE *sysfile = NULL;
+
+ if (!hwloc_topology_is_thissystem(topology)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ sprintf(path, "/sys/class/infiniband/%s/device/local_cpus",
+ ibv_get_device_name(ibdev));
+ sysfile = fopen(path, "r");
+ if (!sysfile)
+ return -1;
+
+ hwloc_linux_parse_cpumap_file(sysfile, set);
+ if (hwloc_bitmap_iszero(set))
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+
+ fclose(sysfile);
+#else
+ /* Non-Linux systems simply get a full cpuset */
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+#endif
+ return 0;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the OpenFabrics
+ * device named \p ibname.
+ *
+ * Return the OS device object describing the OpenFabrics device whose
+ * name is \p ibname. Returns NULL if there is none.
+ * The name \p ibname is usually obtained from ibv_get_device_name().
+ *
+ * The topology \p topology does not necessarily have to match the current
+ * machine. For instance the topology may be an XML import of a remote host.
+ * I/O devices detection must be enabled in the topology.
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_ibv_get_device_osdev_by_name(hwloc_topology_t topology,
+ const char *ibname)
+{
+ hwloc_obj_t osdev = NULL;
+ while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
+ if (HWLOC_OBJ_OSDEV_OPENFABRICS == osdev->attr->osdev.type
+ && osdev->name && !strcmp(ibname, osdev->name))
+ return osdev;
+ }
+ return NULL;
+}
+
+/** \brief Get the hwloc OS device object corresponding to the OpenFabrics
+ * device \p ibdev.
+ *
+ * Return the OS device object describing the OpenFabrics device \p ibdev.
+ * Returns NULL if there is none.
+ *
+ * Topology \p topology and device \p ibdev must match the local machine.
+ * I/O devices detection must be enabled in the topology.
+ * If not, the locality of the object may still be found using
+ * hwloc_ibv_get_device_cpuset().
+ *
+ * \note The corresponding PCI device object can be obtained by looking
+ * at the OS device parent object.
+ */
+static __hwloc_inline hwloc_obj_t
+hwloc_ibv_get_device_osdev(hwloc_topology_t topology,
+ struct ibv_device *ibdev)
+{
+ if (!hwloc_topology_is_thissystem(topology)) {
+ errno = EINVAL;
+ return NULL;
+ }
+ return hwloc_ibv_get_device_osdev_by_name(topology, ibv_get_device_name(ibdev));
+}
+
+/** @} */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_OPENFABRICS_VERBS_H */
diff --git a/ext/hwloc/include/hwloc/plugins.h b/ext/hwloc/include/hwloc/plugins.h
new file mode 100644
index 000000000..aa5d993c6
--- /dev/null
+++ b/ext/hwloc/include/hwloc/plugins.h
@@ -0,0 +1,385 @@
+/*
+ * Copyright © 2013 Inria. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#ifndef HWLOC_PLUGINS_H
+#define HWLOC_PLUGINS_H
+
+/** \file
+ * \brief Public interface for building hwloc plugins.
+ */
+
+struct hwloc_backend;
+
+#include
+#ifdef HWLOC_INSIDE_PLUGIN
+/* needed for hwloc_plugin_check_namespace() */
+#include
+#endif
+
+
+
+/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components
+ * @{
+ */
+
+/** \brief Discovery component type */
+typedef enum hwloc_disc_component_type_e {
+ /** \brief CPU-only discovery through the OS, or generic no-OS support.
+ * \hideinitializer */
+ HWLOC_DISC_COMPONENT_TYPE_CPU = (1<<0),
+
+ /** \brief xml, synthetic or custom,
+ * platform-specific components such as bgq.
+ * Anything the discovers CPU and everything else.
+ * No misc backend is expected to complement a global component.
+ * \hideinitializer */
+ HWLOC_DISC_COMPONENT_TYPE_GLOBAL = (1<<1),
+
+ /** \brief OpenCL, Cuda, etc.
+ * \hideinitializer */
+ HWLOC_DISC_COMPONENT_TYPE_MISC = (1<<2)
+} hwloc_disc_component_type_t;
+
+/** \brief Discovery component structure
+ *
+ * This is the major kind of components, taking care of the discovery.
+ * They are registered by generic components, either statically-built or as plugins.
+ */
+struct hwloc_disc_component {
+ /** \brief Discovery component type */
+ hwloc_disc_component_type_t type;
+
+ /** \brief Name.
+ * If this component is built as a plugin, this name does not have to match the plugin filename.
+ */
+ const char *name;
+
+ /** \brief Component types to exclude, as an OR'ed set of HWLOC_DISC_COMPONENT_TYPE_*.
+ *
+ * For a GLOBAL component, this usually includes all other types (~0).
+ *
+ * Other components only exclude types that may bring conflicting
+ * topology information. MISC components should likely not be excluded
+ * since they usually bring non-primary additional information.
+ */
+ unsigned excludes;
+
+ /** \brief Instantiate callback to create a backend from the component.
+ * Parameters data1, data2, data3 are NULL except for components
+ * that have special enabling routines such as hwloc_topology_set_xml(). */
+ struct hwloc_backend * (*instantiate)(struct hwloc_disc_component *component, const void *data1, const void *data2, const void *data3);
+
+ /** \brief Component priority.
+ * Used to sort topology->components, higher priority first.
+ * Also used to decide between two components with the same name.
+ *
+ * Usual values are
+ * 50 for native OS (or platform) components,
+ * 45 for x86,
+ * 40 for no-OS fallback,
+ * 30 for global components (xml/synthetic/custom),
+ * 20 for pci,
+ * 10 for other misc components (opencl etc.).
+ */
+ unsigned priority;
+
+ /** \private Used internally to list components by priority on topology->components
+ * (the component structure is usually read-only,
+ * the core copies it before using this field for queueing)
+ */
+ struct hwloc_disc_component * next;
+};
+
+/** @} */
+
+
+
+
+/** \defgroup hwlocality_disc_backends Components and Plugins: Discovery backends
+ * @{
+ */
+
+/** \brief Discovery backend structure
+ *
+ * A backend is the instantiation of a discovery component.
+ * When a component gets enabled for a topology,
+ * its instantiate() callback creates a backend.
+ *
+ * hwloc_backend_alloc() initializes all fields to default values
+ * that the component may change (except "component" and "next")
+ * before enabling the backend with hwloc_backend_enable().
+ */
+struct hwloc_backend {
+ /** \private Reserved for the core, set by hwloc_backend_alloc() */
+ struct hwloc_disc_component * component;
+ /** \private Reserved for the core, set by hwloc_backend_enable() */
+ struct hwloc_topology * topology;
+ /** \private Reserved for the core. Set to 1 if forced through envvar, 0 otherwise. */
+ int envvar_forced;
+ /** \private Reserved for the core. Used internally to list backends topology->backends. */
+ struct hwloc_backend * next;
+
+ /** \brief Backend flags, as an OR'ed set of HWLOC_BACKEND_FLAG_* */
+ unsigned long flags;
+
+ /** \brief Backend-specific 'is_custom' property.
+ * Shortcut on !strcmp(..->component->name, "custom").
+ * Only the custom component should touch this. */
+ int is_custom;
+
+ /** \brief Backend-specific 'is_thissystem' property.
+ * Set to 0 or 1 if the backend should enforce the thissystem flag when it gets enabled.
+ * Set to -1 if the backend doesn't care (default). */
+ int is_thissystem;
+
+ /** \brief Backend private data, or NULL if none. */
+ void * private_data;
+ /** \brief Callback for freeing the private_data.
+ * May be NULL.
+ */
+ void (*disable)(struct hwloc_backend *backend);
+
+ /** \brief Main discovery callback.
+ * returns > 0 if it modified the topology tree, -1 on error, 0 otherwise.
+ * May be NULL if type is HWLOC_DISC_COMPONENT_TYPE_MISC. */
+ int (*discover)(struct hwloc_backend *backend);
+
+ /** \brief Callback used by the PCI backend to retrieve the locality of a PCI object from the OS/cpu backend.
+ * May be NULL. */
+ int (*get_obj_cpuset)(struct hwloc_backend *backend, struct hwloc_backend *caller, struct hwloc_obj *obj, hwloc_bitmap_t cpuset);
+
+ /** \brief Callback called by backends to notify this backend that a new object was added.
+ * returns > 0 if it modified the topology tree, 0 otherwise.
+ * May be NULL. */
+ int (*notify_new_object)(struct hwloc_backend *backend, struct hwloc_backend *caller, struct hwloc_obj *obj);
+};
+
+/** \brief Backend flags */
+enum hwloc_backend_flag_e {
+ /** \brief Levels should be reconnected before this backend discover() is used.
+ * \hideinitializer */
+ HWLOC_BACKEND_FLAG_NEED_LEVELS = (1UL<<0)
+};
+
+/** \brief Allocate a backend structure, set good default values, initialize backend->component and topology, etc.
+ * The caller will then modify whatever needed, and call hwloc_backend_enable().
+ */
+HWLOC_DECLSPEC struct hwloc_backend * hwloc_backend_alloc(struct hwloc_disc_component *component);
+
+/** \brief Enable a previously allocated and setup backend. */
+HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_topology *topology, struct hwloc_backend *backend);
+
+/** \brief Used by backends discovery callbacks to request locality information from others.
+ *
+ * Traverse the list of enabled backends until one has a
+ * get_obj_cpuset() method, and call it.
+ */
+HWLOC_DECLSPEC int hwloc_backends_get_obj_cpuset(struct hwloc_backend *caller, struct hwloc_obj *obj, hwloc_bitmap_t cpuset);
+
+/** \brief Used by backends discovery callbacks to notify other
+ * backends of new objects.
+ *
+ * Traverse the list of enabled backends (all but caller) and invoke
+ * their notify_new_object() method to notify them that a new object
+ * just got added to the topology.
+ *
+ * Currently only used for notifying of new PCI device objects.
+ */
+HWLOC_DECLSPEC int hwloc_backends_notify_new_object(struct hwloc_backend *caller, struct hwloc_obj *obj);
+
+/** @} */
+
+
+
+
+/** \defgroup hwlocality_generic_components Components and Plugins: Generic components
+ * @{
+ */
+
+/** \brief Generic component type */
+typedef enum hwloc_component_type_e {
+ /** \brief The data field must point to a struct hwloc_disc_component. */
+ HWLOC_COMPONENT_TYPE_DISC,
+
+ /** \brief The data field must point to a struct hwloc_xml_component. */
+ HWLOC_COMPONENT_TYPE_XML
+} hwloc_component_type_t;
+
+/** \brief Generic component structure
+ *
+ * Generic components structure, either statically listed by configure in static-components.h
+ * or dynamically loaded as a plugin.
+ */
+struct hwloc_component {
+ /** \brief Component ABI version, set to HWLOC_COMPONENT_ABI */
+ unsigned abi;
+
+ /** \brief Component type */
+ hwloc_component_type_t type;
+
+ /** \brief Component flags, unused for now */
+ unsigned long flags;
+
+ /** \brief Component data, pointing to a struct hwloc_disc_component or struct hwloc_xml_component. */
+ void * data;
+};
+
+/** @} */
+
+
+
+
+/** \defgroup hwlocality_components_core_funcs Components and Plugins: Core functions to be used by components
+ * @{
+ */
+
+/** \brief Add an object to the topology.
+ *
+ * It is sorted along the tree of other objects according to the inclusion of
+ * cpusets, to eventually be added as a child of the smallest object including
+ * this object.
+ *
+ * If the cpuset is empty, the type of the object (and maybe some attributes)
+ * must be enough to find where to insert the object. This is especially true
+ * for NUMA nodes with memory and no CPUs.
+ *
+ * The given object should not have children.
+ *
+ * This shall only be called before levels are built.
+ *
+ * In case of error, hwloc_report_os_error() is called.
+ *
+ * Returns the object on success.
+ * Returns NULL and frees obj on error.
+ * Returns another object and frees obj if it was merged with an identical pre-existing object.
+ */
+HWLOC_DECLSPEC struct hwloc_obj *hwloc_insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj);
+
+/** \brief Type of error callbacks during object insertion */
+typedef void (*hwloc_report_error_t)(const char * msg, int line);
+/** \brief Report an insertion error from a backend */
+HWLOC_DECLSPEC void hwloc_report_os_error(const char * msg, int line);
+/** \brief Check whether insertion errors are hidden */
+HWLOC_DECLSPEC int hwloc_hide_errors(void);
+
+/** \brief Add an object to the topology and specify which error callback to use.
+ *
+ * Aside from the error callback selection, this function is identical to hwloc_insert_object_by_cpuset()
+ */
+HWLOC_DECLSPEC struct hwloc_obj *hwloc__insert_object_by_cpuset(struct hwloc_topology *topology, hwloc_obj_t obj, hwloc_report_error_t report_error);
+
+/** \brief Insert an object somewhere in the topology.
+ *
+ * It is added as the last child of the given parent.
+ * The cpuset is completely ignored, so strange objects such as I/O devices should
+ * preferably be inserted with this.
+ *
+ * The given object may have children.
+ *
+ * Remember to call topology_connect() afterwards to fix handy pointers.
+ */
+HWLOC_DECLSPEC void hwloc_insert_object_by_parent(struct hwloc_topology *topology, hwloc_obj_t parent, hwloc_obj_t obj);
+
+/** \brief Allocate and initialize an object of the given type and physical index */
+static __hwloc_inline struct hwloc_obj *
+hwloc_alloc_setup_object(hwloc_obj_type_t type, signed os_index)
+{
+ struct hwloc_obj *obj = malloc(sizeof(*obj));
+ memset(obj, 0, sizeof(*obj));
+ obj->type = type;
+ obj->os_index = os_index;
+ obj->os_level = -1;
+ obj->attr = malloc(sizeof(*obj->attr));
+ memset(obj->attr, 0, sizeof(*obj->attr));
+ /* do not allocate the cpuset here, let the caller do it */
+ return obj;
+}
+
+/** \brief Setup object cpusets/nodesets by OR'ing its children.
+ *
+ * Used when adding an object late in the topology, after propagating sets up and down.
+ * The caller should use this after inserting by cpuset (which means the cpusets is already OK).
+ * Typical case: PCI backend adding a hostbridge parent.
+ */
+HWLOC_DECLSPEC int hwloc_fill_object_sets(hwloc_obj_t obj);
+
+/** \brief Insert a list of PCI devices and bridges in the backend topology.
+ *
+ * Insert a list of objects (either PCI device or bridges) starting at first_obj
+ * (linked by next_sibling in the topology, and ending with NULL).
+ * Objects are placed under the right bridges, and the remaining upstream bridges
+ * are then inserted in the topology by calling the get_obj_cpuset() callback to
+ * find their locality.
+ */
+HWLOC_DECLSPEC int hwloc_insert_pci_device_list(struct hwloc_backend *backend, struct hwloc_obj *first_obj);
+
+/** \brief Return the offset of the given capability in the PCI config space buffer
+ *
+ * This function requires a 256-bytes config space. Unknown/unavailable bytes should be set to 0xff.
+ */
+HWLOC_DECLSPEC unsigned hwloc_pci_find_cap(const unsigned char *config, unsigned cap);
+
+/** \brief Fill linkspeed by reading the PCI config space where PCI_CAP_ID_EXP is at position offset.
+ *
+ * Needs 20 bytes of EXP capability block starting at offset in the config space
+ * for registers up to link status.
+ */
+HWLOC_DECLSPEC int hwloc_pci_find_linkspeed(const unsigned char *config, unsigned offset, float *linkspeed);
+
+/** \brief Modify the PCI device object into a bridge and fill its attribute if a bridge is found in the PCI config space.
+ *
+ * This function requires 64 bytes of common configuration header at the beginning of config.
+ */
+HWLOC_DECLSPEC int hwloc_pci_prepare_bridge(hwloc_obj_t obj, const unsigned char *config);
+
+/** \brief Make sure that plugins can lookup core symbols.
+ *
+ * This is a sanity check to avoid lazy-lookup failures when libhwloc
+ * is loaded within a plugin, and later tries to load its own plugins.
+ * This may fail (and abort the program) if libhwloc symbols are in a
+ * private namespace.
+ *
+ * Plugins should call this function as an early sanity check to avoid
+ * later crashes if lazy symbol resolution is used by the upper layer that
+ * loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY).
+ *
+ * \note The build system must define HWLOC_INSIDE_PLUGIN if and only if
+ * building the caller as a plugin.
+ */
+static __hwloc_inline int
+hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused)
+{
+#ifdef HWLOC_INSIDE_PLUGIN
+ lt_dlhandle handle;
+ void *sym;
+ handle = lt_dlopen(NULL);
+ if (!handle)
+ /* cannot check, assume things will work */
+ return 0;
+ sym = lt_dlsym(handle, symbol);
+ lt_dlclose(handle);
+ if (!sym) {
+ static int verboseenv_checked = 0;
+ static int verboseenv_value = 0;
+ if (!verboseenv_checked) {
+ char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE");
+ verboseenv_value = atoi(verboseenv);
+ verboseenv_checked = 1;
+ }
+ if (verboseenv_value)
+ fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n",
+ pluginname, symbol);
+ return -1;
+ }
+#endif /* HWLOC_INSIDE_PLUGIN */
+ return 0;
+}
+
+/** @} */
+
+
+
+
+#endif /* HWLOC_PLUGINS_H */
diff --git a/ext/hwloc/include/hwloc/rename.h b/ext/hwloc/include/hwloc/rename.h
new file mode 100644
index 000000000..ab0bf389c
--- /dev/null
+++ b/ext/hwloc/include/hwloc/rename.h
@@ -0,0 +1,625 @@
+/*
+ * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
+ * Copyright © 2010-2013 Inria. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#ifndef HWLOC_RENAME_H
+#define HWLOC_RENAME_H
+
+#include
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Only enact these defines if we're actually renaming the symbols
+ (i.e., avoid trying to have no-op defines if we're *not*
+ renaming). */
+
+#if HWLOC_SYM_TRANSFORM
+
+/* Use a preprocessor two-step in order to get the prefixing right.
+ Make 2 macros: HWLOC_NAME and HWLOC_NAME_CAPS for renaming
+ things. */
+
+#define HWLOC_MUNGE_NAME(a, b) HWLOC_MUNGE_NAME2(a, b)
+#define HWLOC_MUNGE_NAME2(a, b) a ## b
+#define HWLOC_NAME(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX, hwloc_ ## name)
+#define HWLOC_NAME_CAPS(name) HWLOC_MUNGE_NAME(HWLOC_SYM_PREFIX_CAPS, hwloc_ ## name)
+
+/* Now define all the "real" names to be the prefixed names. This
+ allows us to use the real names throughout the code base (i.e.,
+ "hwloc_"); the preprocessor will adjust to have the prefixed
+ name under the covers. */
+
+/* Names from hwloc.h */
+
+#define hwloc_get_api_version HWLOC_NAME(get_api_version)
+
+#define hwloc_topology HWLOC_NAME(topology)
+#define hwloc_topology_t HWLOC_NAME(topology_t)
+
+#define hwloc_cpuset_t HWLOC_NAME(cpuset_t)
+#define hwloc_const_cpuset_t HWLOC_NAME(const_cpuset_t)
+#define hwloc_nodeset_t HWLOC_NAME(nodeset_t)
+#define hwloc_const_nodeset_t HWLOC_NAME(const_nodeset_t)
+
+#define HWLOC_OBJ_SYSTEM HWLOC_NAME_CAPS(OBJ_SYSTEM)
+#define HWLOC_OBJ_MACHINE HWLOC_NAME_CAPS(OBJ_MACHINE)
+#define HWLOC_OBJ_NODE HWLOC_NAME_CAPS(OBJ_NODE)
+#define HWLOC_OBJ_SOCKET HWLOC_NAME_CAPS(OBJ_SOCKET)
+#define HWLOC_OBJ_CACHE HWLOC_NAME_CAPS(OBJ_CACHE)
+#define HWLOC_OBJ_CORE HWLOC_NAME_CAPS(OBJ_CORE)
+#define HWLOC_OBJ_PU HWLOC_NAME_CAPS(OBJ_PU)
+#define HWLOC_OBJ_MISC HWLOC_NAME_CAPS(OBJ_MISC)
+#define HWLOC_OBJ_GROUP HWLOC_NAME_CAPS(OBJ_GROUP)
+#define HWLOC_OBJ_BRIDGE HWLOC_NAME_CAPS(OBJ_BRIDGE)
+#define HWLOC_OBJ_PCI_DEVICE HWLOC_NAME_CAPS(OBJ_PCI_DEVICE)
+#define HWLOC_OBJ_OS_DEVICE HWLOC_NAME_CAPS(OBJ_OS_DEVICE)
+#define HWLOC_OBJ_TYPE_MAX HWLOC_NAME_CAPS(OBJ_TYPE_MAX)
+#define hwloc_obj_type_t HWLOC_NAME(obj_type_t)
+
+#define hwloc_obj_cache_type_e HWLOC_NAME(obj_cache_type_e)
+#define hwloc_obj_cache_type_t HWLOC_NAME(obj_cache_type_t)
+#define HWLOC_OBJ_CACHE_UNIFIED HWLOC_NAME_CAPS(OBJ_CACHE_UNIFIED)
+#define HWLOC_OBJ_CACHE_DATA HWLOC_NAME_CAPS(OBJ_CACHE_DATA)
+#define HWLOC_OBJ_CACHE_INSTRUCTION HWLOC_NAME_CAPS(OBJ_CACHE_INSTRUCTION)
+
+#define hwloc_obj_bridge_type_e HWLOC_NAME(obj_bridge_type_e)
+#define hwloc_obj_bridge_type_t HWLOC_NAME(obj_bridge_type_t)
+#define HWLOC_OBJ_BRIDGE_HOST HWLOC_NAME_CAPS(OBJ_BRIDGE_HOST)
+#define HWLOC_OBJ_BRIDGE_PCI HWLOC_NAME_CAPS(OBJ_BRIDGE_PCI)
+
+#define hwloc_obj_osdev_type_e HWLOC_NAME(obj_osdev_type_e)
+#define hwloc_obj_osdev_type_t HWLOC_NAME(obj_osdev_type_t)
+#define HWLOC_OBJ_OSDEV_BLOCK HWLOC_NAME_CAPS(OBJ_OSDEV_BLOCK)
+#define HWLOC_OBJ_OSDEV_GPU HWLOC_NAME_CAPS(OBJ_OSDEV_GPU)
+#define HWLOC_OBJ_OSDEV_NETWORK HWLOC_NAME_CAPS(OBJ_OSDEV_NETWORK)
+#define HWLOC_OBJ_OSDEV_OPENFABRICS HWLOC_NAME_CAPS(OBJ_OSDEV_OPENFABRICS)
+#define HWLOC_OBJ_OSDEV_DMA HWLOC_NAME_CAPS(OBJ_OSDEV_DMA)
+#define HWLOC_OBJ_OSDEV_COPROC HWLOC_NAME_CAPS(OBJ_OSDEV_COPROC)
+
+#define hwloc_compare_types HWLOC_NAME(compare_types)
+
+#define hwloc_compare_types_e HWLOC_NAME(compare_types_e)
+#define HWLOC_TYPE_UNORDERED HWLOC_NAME_CAPS(TYPE_UNORDERED)
+
+#define hwloc_obj_memory_s HWLOC_NAME(obj_memory_s)
+#define hwloc_obj_memory_page_type_s HWLOC_NAME(obj_memory_page_type_s)
+
+#define hwloc_obj HWLOC_NAME(obj)
+#define hwloc_obj_t HWLOC_NAME(obj_t)
+
+#define hwloc_distances_s HWLOC_NAME(distances_s)
+#define hwloc_obj_info_s HWLOC_NAME(obj_info_s)
+
+#define hwloc_obj_attr_u HWLOC_NAME(obj_attr_u)
+#define hwloc_cache_attr_s HWLOC_NAME(cache_attr_s)
+#define hwloc_group_attr_s HWLOC_NAME(group_attr_s)
+#define hwloc_pcidev_attr_s HWLOC_NAME(pcidev_attr_s)
+#define hwloc_bridge_attr_s HWLOC_NAME(bridge_attr_s)
+#define hwloc_osdev_attr_s HWLOC_NAME(osdev_attr_s)
+
+#define hwloc_topology_init HWLOC_NAME(topology_init)
+#define hwloc_topology_load HWLOC_NAME(topology_load)
+#define hwloc_topology_destroy HWLOC_NAME(topology_destroy)
+#define hwloc_topology_check HWLOC_NAME(topology_check)
+#define hwloc_topology_ignore_type HWLOC_NAME(topology_ignore_type)
+#define hwloc_topology_ignore_type_keep_structure HWLOC_NAME(topology_ignore_type_keep_structure)
+#define hwloc_topology_ignore_all_keep_structure HWLOC_NAME(topology_ignore_all_keep_structure)
+
+#define hwloc_topology_flags_e HWLOC_NAME(topology_flags_e)
+
+#define HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_SYSTEM)
+#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM)
+#define HWLOC_TOPOLOGY_FLAG_IO_DEVICES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_DEVICES)
+#define HWLOC_TOPOLOGY_FLAG_IO_BRIDGES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IO_BRIDGES)
+#define HWLOC_TOPOLOGY_FLAG_WHOLE_IO HWLOC_NAME_CAPS(TOPOLOGY_FLAG_WHOLE_IO)
+#define HWLOC_TOPOLOGY_FLAG_ICACHES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_ICACHES)
+
+#define hwloc_topology_set_flags HWLOC_NAME(topology_set_flags)
+#define hwloc_topology_set_fsroot HWLOC_NAME(topology_set_fsroot)
+#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid)
+#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic)
+#define hwloc_topology_set_xml HWLOC_NAME(topology_set_xml)
+#define hwloc_topology_set_xmlbuffer HWLOC_NAME(topology_set_xmlbuffer)
+#define hwloc_topology_set_custom HWLOC_NAME(topology_set_custom)
+#define hwloc_topology_set_distance_matrix HWLOC_NAME(topology_set_distance_matrix)
+
+#define hwloc_topology_discovery_support HWLOC_NAME(topology_discovery_support)
+#define hwloc_topology_cpubind_support HWLOC_NAME(topology_cpubind_support)
+#define hwloc_topology_membind_support HWLOC_NAME(topology_membind_support)
+#define hwloc_topology_support HWLOC_NAME(topology_support)
+#define hwloc_topology_get_support HWLOC_NAME(topology_get_support)
+#define hwloc_topology_export_xml HWLOC_NAME(topology_export_xml)
+#define hwloc_topology_export_xmlbuffer HWLOC_NAME(topology_export_xmlbuffer)
+#define hwloc_free_xmlbuffer HWLOC_NAME(free_xmlbuffer)
+#define hwloc_topology_set_userdata_export_callback HWLOC_NAME(topology_set_userdata_export_callback)
+#define hwloc_export_obj_userdata HWLOC_NAME(export_obj_userdata)
+#define hwloc_export_obj_userdata_base64 HWLOC_NAME(export_obj_userdata_base64)
+#define hwloc_topology_set_userdata_import_callback HWLOC_NAME(topology_set_userdata_import_callback)
+
+#define hwloc_topology_insert_misc_object_by_cpuset HWLOC_NAME(topology_insert_misc_object_by_cpuset)
+#define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent)
+
+#define hwloc_custom_insert_topology HWLOC_NAME(custom_insert_topology)
+#define hwloc_custom_insert_group_object_by_parent HWLOC_NAME(custom_insert_group_object_by_parent)
+
+#define hwloc_restrict_flags_e HWLOC_NAME(restrict_flags_e)
+#define HWLOC_RESTRICT_FLAG_ADAPT_DISTANCES HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_DISTANCES)
+#define HWLOC_RESTRICT_FLAG_ADAPT_MISC HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_MISC)
+#define HWLOC_RESTRICT_FLAG_ADAPT_IO HWLOC_NAME_CAPS(RESTRICT_FLAG_ADAPT_IO)
+#define hwloc_topology_restrict HWLOC_NAME(topology_restrict)
+#define hwloc_topology_dup HWLOC_NAME(topology_dup)
+
+#define hwloc_topology_get_depth HWLOC_NAME(topology_get_depth)
+#define hwloc_get_type_depth HWLOC_NAME(get_type_depth)
+
+#define hwloc_get_type_depth_e HWLOC_NAME(get_type_depth_e)
+#define HWLOC_TYPE_DEPTH_UNKNOWN HWLOC_NAME_CAPS(TYPE_DEPTH_UNKNOWN)
+#define HWLOC_TYPE_DEPTH_MULTIPLE HWLOC_NAME_CAPS(TYPE_DEPTH_MULTIPLE)
+#define HWLOC_TYPE_DEPTH_BRIDGE HWLOC_NAME_CAPS(TYPE_DEPTH_BRIDGE)
+#define HWLOC_TYPE_DEPTH_PCI_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_PCI_DEVICE)
+#define HWLOC_TYPE_DEPTH_OS_DEVICE HWLOC_NAME_CAPS(TYPE_DEPTH_OS_DEVICE)
+
+#define hwloc_get_depth_type HWLOC_NAME(get_depth_type)
+#define hwloc_get_nbobjs_by_depth HWLOC_NAME(get_nbobjs_by_depth)
+#define hwloc_get_nbobjs_by_type HWLOC_NAME(get_nbobjs_by_type)
+
+#define hwloc_topology_is_thissystem HWLOC_NAME(topology_is_thissystem)
+#define hwloc_topology_get_flags HWLOC_NAME(topology_get_flags)
+
+#define hwloc_get_obj_by_depth HWLOC_NAME(get_obj_by_depth )
+#define hwloc_get_obj_by_type HWLOC_NAME(get_obj_by_type )
+
+#define hwloc_obj_type_string HWLOC_NAME(obj_type_string )
+#define hwloc_obj_type_of_string HWLOC_NAME(obj_type_of_string )
+#define hwloc_obj_type_snprintf HWLOC_NAME(obj_type_snprintf )
+#define hwloc_obj_attr_snprintf HWLOC_NAME(obj_attr_snprintf )
+#define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf)
+#define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name)
+#define hwloc_obj_add_info HWLOC_NAME(obj_add_info)
+
+#define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS)
+#define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD)
+#define HWLOC_CPUBIND_STRICT HWLOC_NAME_CAPS(CPUBIND_STRICT)
+#define HWLOC_CPUBIND_NOMEMBIND HWLOC_NAME_CAPS(CPUBIND_NOMEMBIND)
+
+#define hwloc_cpubind_flags_t HWLOC_NAME(cpubind_flags_t)
+
+#define hwloc_set_cpubind HWLOC_NAME(set_cpubind)
+#define hwloc_get_cpubind HWLOC_NAME(get_cpubind)
+#define hwloc_set_proc_cpubind HWLOC_NAME(set_proc_cpubind)
+#define hwloc_get_proc_cpubind HWLOC_NAME(get_proc_cpubind)
+#define hwloc_set_thread_cpubind HWLOC_NAME(set_thread_cpubind)
+#define hwloc_get_thread_cpubind HWLOC_NAME(get_thread_cpubind)
+
+#define hwloc_get_last_cpu_location HWLOC_NAME(get_last_cpu_location)
+#define hwloc_get_proc_last_cpu_location HWLOC_NAME(get_proc_last_cpu_location)
+
+#define HWLOC_MEMBIND_DEFAULT HWLOC_NAME_CAPS(MEMBIND_DEFAULT)
+#define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH)
+#define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND)
+#define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE)
+#define HWLOC_MEMBIND_REPLICATE HWLOC_NAME_CAPS(MEMBIND_REPLICATE)
+#define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH)
+#define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED)
+
+#define hwloc_membind_policy_t HWLOC_NAME(membind_policy_t)
+
+#define HWLOC_MEMBIND_PROCESS HWLOC_NAME_CAPS(MEMBIND_PROCESS)
+#define HWLOC_MEMBIND_THREAD HWLOC_NAME_CAPS(MEMBIND_THREAD)
+#define HWLOC_MEMBIND_STRICT HWLOC_NAME_CAPS(MEMBIND_STRICT)
+#define HWLOC_MEMBIND_MIGRATE HWLOC_NAME_CAPS(MEMBIND_MIGRATE)
+#define HWLOC_MEMBIND_NOCPUBIND HWLOC_NAME_CAPS(MEMBIND_NOCPUBIND)
+
+#define hwloc_membind_flags_t HWLOC_NAME(membind_flags_t)
+
+#define hwloc_set_membind_nodeset HWLOC_NAME(set_membind_nodeset)
+#define hwloc_set_membind HWLOC_NAME(set_membind)
+#define hwloc_get_membind_nodeset HWLOC_NAME(get_membind_nodeset)
+#define hwloc_get_membind HWLOC_NAME(get_membind)
+#define hwloc_set_proc_membind_nodeset HWLOC_NAME(set_proc_membind_nodeset)
+#define hwloc_set_proc_membind HWLOC_NAME(set_proc_membind)
+#define hwloc_get_proc_membind_nodeset HWLOC_NAME(get_proc_membind_nodeset)
+#define hwloc_get_proc_membind HWLOC_NAME(get_proc_membind)
+#define hwloc_set_area_membind_nodeset HWLOC_NAME(set_area_membind_nodeset)
+#define hwloc_set_area_membind HWLOC_NAME(set_area_membind)
+#define hwloc_get_area_membind_nodeset HWLOC_NAME(get_area_membind_nodeset)
+#define hwloc_get_area_membind HWLOC_NAME(get_area_membind)
+#define hwloc_alloc_membind_nodeset HWLOC_NAME(alloc_membind_nodeset)
+#define hwloc_alloc_membind HWLOC_NAME(alloc_membind)
+#define hwloc_alloc HWLOC_NAME(alloc)
+#define hwloc_free HWLOC_NAME(free)
+
+#define hwloc_get_non_io_ancestor_obj HWLOC_NAME(get_non_io_ancestor_obj)
+#define hwloc_get_next_pcidev HWLOC_NAME(get_next_pcidev)
+#define hwloc_get_pcidev_by_busid HWLOC_NAME(get_pcidev_by_busid)
+#define hwloc_get_pcidev_by_busidstring HWLOC_NAME(get_pcidev_by_busidstring)
+#define hwloc_get_next_osdev HWLOC_NAME(get_next_osdev)
+#define hwloc_get_next_bridge HWLOC_NAME(get_next_bridge)
+#define hwloc_bridge_covers_pcibus HWLOC_NAME(bridge_covers_pcibus)
+#define hwloc_get_hostbridge_by_pcibus HWLOC_NAME(get_hostbridge_by_pcibus)
+
+/* hwloc/bitmap.h */
+
+#define hwloc_bitmap_s HWLOC_NAME(bitmap_s)
+#define hwloc_bitmap_t HWLOC_NAME(bitmap_t)
+#define hwloc_const_bitmap_t HWLOC_NAME(const_bitmap_t)
+
+#define hwloc_bitmap_alloc HWLOC_NAME(bitmap_alloc)
+#define hwloc_bitmap_alloc_full HWLOC_NAME(bitmap_alloc_full)
+#define hwloc_bitmap_free HWLOC_NAME(bitmap_free)
+#define hwloc_bitmap_dup HWLOC_NAME(bitmap_dup)
+#define hwloc_bitmap_copy HWLOC_NAME(bitmap_copy)
+#define hwloc_bitmap_snprintf HWLOC_NAME(bitmap_snprintf)
+#define hwloc_bitmap_asprintf HWLOC_NAME(bitmap_asprintf)
+#define hwloc_bitmap_sscanf HWLOC_NAME(bitmap_sscanf)
+#define hwloc_bitmap_list_snprintf HWLOC_NAME(bitmap_list_snprintf)
+#define hwloc_bitmap_list_asprintf HWLOC_NAME(bitmap_list_asprintf)
+#define hwloc_bitmap_list_sscanf HWLOC_NAME(bitmap_list_sscanf)
+#define hwloc_bitmap_taskset_snprintf HWLOC_NAME(bitmap_taskset_snprintf)
+#define hwloc_bitmap_taskset_asprintf HWLOC_NAME(bitmap_taskset_asprintf)
+#define hwloc_bitmap_taskset_sscanf HWLOC_NAME(bitmap_taskset_sscanf)
+#define hwloc_bitmap_zero HWLOC_NAME(bitmap_zero)
+#define hwloc_bitmap_fill HWLOC_NAME(bitmap_fill)
+#define hwloc_bitmap_from_ulong HWLOC_NAME(bitmap_from_ulong)
+
+#define hwloc_bitmap_from_ith_ulong HWLOC_NAME(bitmap_from_ith_ulong)
+#define hwloc_bitmap_to_ulong HWLOC_NAME(bitmap_to_ulong)
+#define hwloc_bitmap_to_ith_ulong HWLOC_NAME(bitmap_to_ith_ulong)
+#define hwloc_bitmap_only HWLOC_NAME(bitmap_only)
+#define hwloc_bitmap_allbut HWLOC_NAME(bitmap_allbut)
+#define hwloc_bitmap_set HWLOC_NAME(bitmap_set)
+#define hwloc_bitmap_set_range HWLOC_NAME(bitmap_set_range)
+#define hwloc_bitmap_set_ith_ulong HWLOC_NAME(bitmap_set_ith_ulong)
+#define hwloc_bitmap_clr HWLOC_NAME(bitmap_clr)
+#define hwloc_bitmap_clr_range HWLOC_NAME(bitmap_clr_range)
+#define hwloc_bitmap_isset HWLOC_NAME(bitmap_isset)
+#define hwloc_bitmap_iszero HWLOC_NAME(bitmap_iszero)
+#define hwloc_bitmap_isfull HWLOC_NAME(bitmap_isfull)
+#define hwloc_bitmap_isequal HWLOC_NAME(bitmap_isequal)
+#define hwloc_bitmap_intersects HWLOC_NAME(bitmap_intersects)
+#define hwloc_bitmap_isincluded HWLOC_NAME(bitmap_isincluded)
+#define hwloc_bitmap_or HWLOC_NAME(bitmap_or)
+#define hwloc_bitmap_and HWLOC_NAME(bitmap_and)
+#define hwloc_bitmap_andnot HWLOC_NAME(bitmap_andnot)
+#define hwloc_bitmap_xor HWLOC_NAME(bitmap_xor)
+#define hwloc_bitmap_not HWLOC_NAME(bitmap_not)
+#define hwloc_bitmap_first HWLOC_NAME(bitmap_first)
+#define hwloc_bitmap_last HWLOC_NAME(bitmap_last)
+#define hwloc_bitmap_next HWLOC_NAME(bitmap_next)
+#define hwloc_bitmap_singlify HWLOC_NAME(bitmap_singlify)
+#define hwloc_bitmap_compare_first HWLOC_NAME(bitmap_compare_first)
+#define hwloc_bitmap_compare HWLOC_NAME(bitmap_compare)
+#define hwloc_bitmap_weight HWLOC_NAME(bitmap_weight)
+
+/* hwloc/helper.h */
+
+#define hwloc_get_type_or_below_depth HWLOC_NAME(get_type_or_below_depth)
+#define hwloc_get_type_or_above_depth HWLOC_NAME(get_type_or_above_depth)
+#define hwloc_get_root_obj HWLOC_NAME(get_root_obj)
+#define hwloc_get_ancestor_obj_by_depth HWLOC_NAME(get_ancestor_obj_by_depth)
+#define hwloc_get_ancestor_obj_by_type HWLOC_NAME(get_ancestor_obj_by_type)
+#define hwloc_get_next_obj_by_depth HWLOC_NAME(get_next_obj_by_depth)
+#define hwloc_get_next_obj_by_type HWLOC_NAME(get_next_obj_by_type)
+#define hwloc_get_pu_obj_by_os_index HWLOC_NAME(get_pu_obj_by_os_index)
+#define hwloc_get_next_child HWLOC_NAME(get_next_child)
+#define hwloc_get_common_ancestor_obj HWLOC_NAME(get_common_ancestor_obj)
+#define hwloc_obj_is_in_subtree HWLOC_NAME(obj_is_in_subtree)
+#define hwloc_get_first_largest_obj_inside_cpuset HWLOC_NAME(get_first_largest_obj_inside_cpuset)
+#define hwloc_get_largest_objs_inside_cpuset HWLOC_NAME(get_largest_objs_inside_cpuset)
+#define hwloc_get_next_obj_inside_cpuset_by_depth HWLOC_NAME(get_next_obj_inside_cpuset_by_depth)
+#define hwloc_get_next_obj_inside_cpuset_by_type HWLOC_NAME(get_next_obj_inside_cpuset_by_type)
+#define hwloc_get_obj_inside_cpuset_by_depth HWLOC_NAME(get_obj_inside_cpuset_by_depth)
+#define hwloc_get_obj_inside_cpuset_by_type HWLOC_NAME(get_obj_inside_cpuset_by_type)
+#define hwloc_get_nbobjs_inside_cpuset_by_depth HWLOC_NAME(get_nbobjs_inside_cpuset_by_depth)
+#define hwloc_get_nbobjs_inside_cpuset_by_type HWLOC_NAME(get_nbobjs_inside_cpuset_by_type)
+#define hwloc_get_obj_index_inside_cpuset HWLOC_NAME(get_obj_index_inside_cpuset)
+#define hwloc_get_child_covering_cpuset HWLOC_NAME(get_child_covering_cpuset)
+#define hwloc_get_obj_covering_cpuset HWLOC_NAME(get_obj_covering_cpuset)
+#define hwloc_get_next_obj_covering_cpuset_by_depth HWLOC_NAME(get_next_obj_covering_cpuset_by_depth)
+#define hwloc_get_next_obj_covering_cpuset_by_type HWLOC_NAME(get_next_obj_covering_cpuset_by_type)
+#define hwloc_get_cache_type_depth HWLOC_NAME(get_cache_type_depth)
+#define hwloc_get_cache_covering_cpuset HWLOC_NAME(get_cache_covering_cpuset)
+#define hwloc_get_shared_cache_covering_obj HWLOC_NAME(get_shared_cache_covering_obj)
+#define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs)
+#define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type)
+#define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type)
+#define hwloc_distributev HWLOC_NAME(distributev)
+#define hwloc_distribute HWLOC_NAME(distribute)
+#define hwloc_alloc_membind_policy HWLOC_NAME(alloc_membind_policy)
+#define hwloc_alloc_membind_policy_nodeset HWLOC_NAME(alloc_membind_policy_nodeset)
+#define hwloc_topology_get_complete_cpuset HWLOC_NAME(topology_get_complete_cpuset)
+#define hwloc_topology_get_topology_cpuset HWLOC_NAME(topology_get_topology_cpuset)
+#define hwloc_topology_get_online_cpuset HWLOC_NAME(topology_get_online_cpuset)
+#define hwloc_topology_get_allowed_cpuset HWLOC_NAME(topology_get_allowed_cpuset)
+#define hwloc_topology_get_complete_nodeset HWLOC_NAME(topology_get_complete_nodeset)
+#define hwloc_topology_get_topology_nodeset HWLOC_NAME(topology_get_topology_nodeset)
+#define hwloc_topology_get_allowed_nodeset HWLOC_NAME(topology_get_allowed_nodeset)
+#define hwloc_cpuset_to_nodeset HWLOC_NAME(cpuset_to_nodeset)
+#define hwloc_cpuset_to_nodeset_strict HWLOC_NAME(cpuset_to_nodeset_strict)
+#define hwloc_cpuset_from_nodeset HWLOC_NAME(cpuset_from_nodeset)
+#define hwloc_cpuset_from_nodeset_strict HWLOC_NAME(cpuset_from_nodeset_strict)
+#define hwloc_get_whole_distance_matrix_by_depth HWLOC_NAME(get_whole_distance_matrix_by_depth)
+#define hwloc_get_whole_distance_matrix_by_type HWLOC_NAME(get_whole_distance_matrix_by_type)
+#define hwloc_get_distance_matrix_covering_obj_by_depth HWLOC_NAME(get_distance_matrix_covering_obj_by_depth)
+#define hwloc_get_latency HWLOC_NAME(get_latency)
+
+/* diff.h */
+
+#define hwloc_topology_diff_obj_attr_type_e HWLOC_NAME(topology_diff_obj_attr_type_e)
+#define hwloc_topology_diff_obj_attr_type_t HWLOC_NAME(topology_diff_obj_attr_type_t)
+#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_SIZE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_SIZE)
+#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_NAME HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_NAME)
+#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR_INFO)
+#define hwloc_topology_diff_obj_attr_u HWLOC_NAME(topology_diff_obj_attr_u)
+#define hwloc_topology_diff_obj_attr_generic_s HWLOC_NAME(topology_diff_obj_attr_generic_s)
+#define hwloc_topology_diff_obj_attr_uint64_s HWLOC_NAME(topology_diff_obj_attr_uint64_s)
+#define hwloc_topology_diff_obj_attr_string_s HWLOC_NAME(topology_diff_obj_attr_string_s)
+#define hwloc_topology_diff_type_e HWLOC_NAME(topology_diff_type_e)
+#define hwloc_topology_diff_type_t HWLOC_NAME(topology_diff_type_t)
+#define HWLOC_TOPOLOGY_DIFF_OBJ_ATTR HWLOC_NAME_CAPS(TOPOLOGY_DIFF_OBJ_ATTR)
+#define HWLOC_TOPOLOGY_DIFF_TOO_COMPLEX HWLOC_NAME_CAPS(TOPOLOGY_DIFF_TOO_COMPLEX)
+#define hwloc_topology_diff_u HWLOC_NAME(topology_diff_u)
+#define hwloc_topology_diff_t HWLOC_NAME(topology_diff_t)
+#define hwloc_topology_diff_generic_s HWLOC_NAME(topology_diff_generic_s)
+#define hwloc_topology_diff_obj_attr_s HWLOC_NAME(topology_diff_obj_attr_s)
+#define hwloc_topology_diff_too_complex_s HWLOC_NAME(topology_diff_too_complex_s)
+#define hwloc_topology_diff_build HWLOC_NAME(topology_diff_build)
+#define hwloc_topology_diff_apply_flags_e HWLOC_NAME(topology_diff_apply_flags_e)
+#define HWLOC_TOPOLOGY_DIFF_APPLY_REVERSE HWLOC_NAME_CAPS(TOPOLOGY_DIFF_APPLY_REVERSE)
+#define hwloc_topology_diff_apply HWLOC_NAME(topology_diff_apply)
+#define hwloc_topology_diff_destroy HWLOC_NAME(topology_diff_destroy)
+#define hwloc_topology_diff_load_xml HWLOC_NAME(topology_diff_load_xml)
+#define hwloc_topology_diff_export_xml HWLOC_NAME(topology_diff_export_xml)
+#define hwloc_topology_diff_load_xmlbuffer HWLOC_NAME(topology_diff_load_xmlbuffer)
+#define hwloc_topology_diff_export_xmlbuffer HWLOC_NAME(topology_diff_export_xmlbuffer)
+
+/* glibc-sched.h */
+
+#define hwloc_cpuset_to_glibc_sched_affinity HWLOC_NAME(cpuset_to_glibc_sched_affinity)
+#define hwloc_cpuset_from_glibc_sched_affinity HWLOC_NAME(cpuset_from_glibc_sched_affinity)
+
+/* linux-libnuma.h */
+
+#define hwloc_cpuset_to_linux_libnuma_ulongs HWLOC_NAME(cpuset_to_linux_libnuma_ulongs)
+#define hwloc_nodeset_to_linux_libnuma_ulongs HWLOC_NAME(nodeset_to_linux_libnuma_ulongs)
+#define hwloc_cpuset_from_linux_libnuma_ulongs HWLOC_NAME(cpuset_from_linux_libnuma_ulongs)
+#define hwloc_nodeset_from_linux_libnuma_ulongs HWLOC_NAME(nodeset_from_linux_libnuma_ulongs)
+#define hwloc_cpuset_to_linux_libnuma_bitmask HWLOC_NAME(cpuset_to_linux_libnuma_bitmask)
+#define hwloc_nodeset_to_linux_libnuma_bitmask HWLOC_NAME(nodeset_to_linux_libnuma_bitmask)
+#define hwloc_cpuset_from_linux_libnuma_bitmask HWLOC_NAME(cpuset_from_linux_libnuma_bitmask)
+#define hwloc_nodeset_from_linux_libnuma_bitmask HWLOC_NAME(nodeset_from_linux_libnuma_bitmask)
+
+/* linux.h */
+
+#define hwloc_linux_parse_cpumap_file HWLOC_NAME(linux_parse_cpumap_file)
+#define hwloc_linux_set_tid_cpubind HWLOC_NAME(linux_set_tid_cpubind)
+#define hwloc_linux_get_tid_cpubind HWLOC_NAME(linux_get_tid_cpubind)
+
+/* openfabrics-verbs.h */
+
+#define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset)
+#define hwloc_ibv_get_device_osdev HWLOC_NAME(ibv_get_device_osdev)
+#define hwloc_ibv_get_device_osdev_by_name HWLOC_NAME(ibv_get_device_osdev_by_name)
+
+/* myriexpress.h */
+
+#define hwloc_mx_board_get_device_cpuset HWLOC_NAME(mx_board_get_device_cpuset)
+#define hwloc_mx_endpoint_get_device_cpuset HWLOC_NAME(mx_endpoint_get_device_cpuset)
+
+/* intel-mic.h */
+
+#define hwloc_intel_mic_get_device_cpuset HWLOC_NAME(intel_mic_get_device_cpuset)
+#define hwloc_intel_mic_get_device_osdev_by_index HWLOC_NAME(intel_mic_get_device_osdev_by_index)
+
+/* opencl.h */
+
+#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset)
+#define hwloc_opencl_get_device_osdev HWLOC_NAME(opencl_get_device_osdev)
+#define hwloc_opencl_get_device_osdev_by_index HWLOC_NAME(opencl_get_device_osdev_by_index)
+
+/* cuda.h */
+
+#define hwloc_cuda_get_device_pci_ids HWLOC_NAME(cuda_get_device_pci_ids)
+#define hwloc_cuda_get_device_cpuset HWLOC_NAME(cuda_get_device_cpuset)
+#define hwloc_cuda_get_device_pcidev HWLOC_NAME(cuda_get_device_pcidev)
+#define hwloc_cuda_get_device_osdev HWLOC_NAME(cuda_get_device_osdev)
+#define hwloc_cuda_get_device_osdev_by_index HWLOC_NAME(cuda_get_device_osdev_by_index)
+
+/* cudart.h */
+
+#define hwloc_cudart_get_device_pci_ids HWLOC_NAME(cudart_get_device_pci_ids)
+#define hwloc_cudart_get_device_cpuset HWLOC_NAME(cudart_get_device_cpuset)
+#define hwloc_cudart_get_device_pcidev HWLOC_NAME(cudart_get_device_pcidev)
+#define hwloc_cudart_get_device_osdev_by_index HWLOC_NAME(cudart_get_device_osdev_by_index)
+
+/* nvml.h */
+
+#define hwloc_nvml_get_device_cpuset HWLOC_NAME(nvml_get_device_cpuset)
+#define hwloc_nvml_get_device_osdev HWLOC_NAME(nvml_get_device_osdev)
+#define hwloc_nvml_get_device_osdev_by_index HWLOC_NAME(nvml_get_device_osdev_by_index)
+
+/* gl.h */
+
+#define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device)
+#define hwloc_gl_get_display_osdev_by_name HWLOC_NAME(gl_get_display_osdev_by_name)
+#define hwloc_gl_get_display_by_osdev HWLOC_NAME(gl_get_display_by_osdev)
+
+/* hwloc/plugins.h */
+
+#define hwloc_disc_component_type_e HWLOC_NAME(disc_component_type_e)
+#define HWLOC_DISC_COMPONENT_TYPE_CPU HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_CPU)
+#define HWLOC_DISC_COMPONENT_TYPE_GLOBAL HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_GLOBAL)
+#define HWLOC_DISC_COMPONENT_TYPE_MISC HWLOC_NAME_CAPS(DISC_COMPONENT_TYPE_MISC)
+#define hwloc_disc_component_type_t HWLOC_NAME(disc_component_type_t)
+#define hwloc_disc_component HWLOC_NAME(disc_component)
+
+#define hwloc_backend HWLOC_NAME(backend)
+#define hwloc_backend_flag_e HWLOC_NAME(backend_flag_e)
+#define HWLOC_BACKEND_FLAG_NEED_LEVELS HWLOC_NAME_CAPS(BACKEND_FLAG_NEED_LEVELS)
+
+#define hwloc_backend_alloc HWLOC_NAME(backend_alloc)
+#define hwloc_backend_enable HWLOC_NAME(backend_enable)
+#define hwloc_backends_get_obj_cpuset HWLOC_NAME(backends_get_obj_cpuset)
+#define hwloc_backends_notify_new_object HWLOC_NAME(backends_notify_new_object)
+
+#define hwloc_component_type_e HWLOC_NAME(component_type_e)
+#define HWLOC_COMPONENT_TYPE_DISC HWLOC_NAME_CAPS(COMPONENT_TYPE_DISC)
+//#define HWLOC_COMPONENT_TYPE_XML HWLOC_NAME_CAPS(COMPONENT_TYPE_XML)
+#define hwloc_component_type_t HWLOC_NAME(component_type_t)
+#define hwloc_component HWLOC_NAME(component)
+
+#define hwloc_plugin_check_namespace HWLOC_NAME(plugin_check_namespace)
+
+#define hwloc_insert_object_by_cpuset HWLOC_NAME(insert_object_by_cpuset)
+#define hwloc_report_error_t HWLOC_NAME(report_error_t)
+#define hwloc_report_os_error HWLOC_NAME(report_os_error)
+#define hwloc_hide_errors HWLOC_NAME(hide_errors)
+#define hwloc__insert_object_by_cpuset HWLOC_NAME(_insert_object_by_cpuset)
+#define hwloc_insert_object_by_parent HWLOC_NAME(insert_object_by_parent)
+#define hwloc_alloc_setup_object HWLOC_NAME(alloc_setup_object)
+#define hwloc_fill_object_sets HWLOC_NAME(fill_object_sets)
+
+#define hwloc_insert_pci_device_list HWLOC_NAME(insert_pci_device_list)
+#define hwloc_pci_find_cap HWLOC_NAME(pci_find_cap)
+#define hwloc_pci_find_linkspeed HWLOC_NAME(pci_find_linkspeed)
+#define hwloc_pci_prepare_bridge HWLOC_NAME(pci_prepare_bridge)
+
+/* hwloc/deprecated.h */
+
+#define hwloc_obj_snprintf HWLOC_NAME(obj_snprintf)
+
+/* private/debug.h */
+
+#define hwloc_debug HWLOC_NAME(debug)
+
+/* private/misc.h */
+
+#define hwloc_snprintf HWLOC_NAME(snprintf)
+#define hwloc_namecoloncmp HWLOC_NAME(namecoloncmp)
+#define hwloc_ffsl_manual HWLOC_NAME(ffsl_manual)
+#define hwloc_ffs32 HWLOC_NAME(ffs32)
+#define hwloc_ffsl_from_ffs32 HWLOC_NAME(ffsl_from_ffs32)
+#define hwloc_flsl_manual HWLOC_NAME(flsl_manual)
+#define hwloc_fls32 HWLOC_NAME(fls32)
+#define hwloc_flsl_from_fls32 HWLOC_NAME(flsl_from_fls32)
+#define hwloc_weight_long HWLOC_NAME(weight_long)
+
+/* private/cpuid.h */
+
+#define hwloc_have_cpuid HWLOC_NAME(have_cpuid)
+#define hwloc_cpuid HWLOC_NAME(cpuid)
+
+/* private/xml.h */
+
+#define hwloc__xml_verbose HWLOC_NAME(_xml_verbose)
+
+#define hwloc__xml_import_state_s HWLOC_NAME(_xml_import_state_s)
+#define hwloc__xml_import_state_t HWLOC_NAME(_xml_import_state_t)
+#define hwloc__xml_import_diff HWLOC_NAME(_xml_import_diff)
+#define hwloc_xml_backend_data_s HWLOC_NAME(xml_backend_data_s)
+#define hwloc__xml_export_state_s HWLOC_NAME(_xml_export_state_s)
+#define hwloc__xml_export_state_t HWLOC_NAME(_xml_export_state_t)
+#define hwloc__xml_export_object HWLOC_NAME(_xml_export_object)
+#define hwloc__xml_export_diff HWLOC_NAME(_xml_export_diff)
+
+#define hwloc_xml_callbacks HWLOC_NAME(xml_callbacks)
+#define hwloc_xml_component HWLOC_NAME(xml_component)
+#define hwloc_xml_callbacks_register HWLOC_NAME(xml_callbacks_register)
+#define hwloc_xml_callbacks_reset HWLOC_NAME(xml_callbacks_reset)
+
+/* private/components.h */
+
+#define hwloc_disc_component_force_enable HWLOC_NAME(disc_component_force_enable)
+#define hwloc_disc_components_enable_others HWLOC_NAME(disc_components_instantiate_others)
+
+#define hwloc_backends_disable_all HWLOC_NAME(backends_disable_all)
+#define hwloc_backends_is_thissystem HWLOC_NAME(backends_is_thissystem)
+
+#define hwloc_components_init HWLOC_NAME(components_init)
+#define hwloc_components_destroy_all HWLOC_NAME(components_destroy_all)
+
+/* private/private.h */
+
+#define hwloc_ignore_type_e HWLOC_NAME(ignore_type_e)
+
+#define HWLOC_IGNORE_TYPE_NEVER HWLOC_NAME_CAPS(IGNORE_TYPE_NEVER)
+#define HWLOC_IGNORE_TYPE_KEEP_STRUCTURE HWLOC_NAME_CAPS(IGNORE_TYPE_KEEP_STRUCTURE)
+#define HWLOC_IGNORE_TYPE_ALWAYS HWLOC_NAME_CAPS(IGNORE_TYPE_ALWAYS)
+
+#define hwloc_os_distances_s HWLOC_NAME(os_distances_s)
+
+#define hwloc_xml_imported_distances_s HWLOC_NAME(xml_imported_distances_s)
+
+#define hwloc_alloc_obj_cpusets HWLOC_NAME(alloc_obj_cpusets)
+#define hwloc_setup_pu_level HWLOC_NAME(setup_pu_level)
+#define hwloc_get_sysctlbyname HWLOC_NAME(get_sysctlbyname)
+#define hwloc_get_sysctl HWLOC_NAME(get_sysctl)
+#define hwloc_fallback_nbprocessors HWLOC_NAME(fallback_nbprocessors)
+#define hwloc_connect_children HWLOC_NAME(connect_children)
+#define hwloc_connect_levels HWLOC_NAME(connect_levels)
+
+#define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults)
+#define hwloc_topology_clear HWLOC_NAME(topology_clear)
+
+#define hwloc_binding_hooks HWLOC_NAME(binding_hooks)
+#define hwloc_set_native_binding_hooks HWLOC_NAME(set_native_binding_hooks)
+#define hwloc_set_binding_hooks HWLOC_NAME(set_binding_hooks)
+
+#define hwloc_set_linuxfs_hooks HWLOC_NAME(set_linuxfs_hooks)
+#define hwloc_set_bgq_hooks HWLOC_NAME(set_bgq_hooks)
+#define hwloc_set_solaris_hooks HWLOC_NAME(set_solaris_hooks)
+#define hwloc_set_aix_hooks HWLOC_NAME(set_aix_hooks)
+#define hwloc_set_osf_hooks HWLOC_NAME(set_osf_hooks)
+#define hwloc_set_windows_hooks HWLOC_NAME(set_windows_hooks)
+#define hwloc_set_darwin_hooks HWLOC_NAME(set_darwin_hooks)
+#define hwloc_set_freebsd_hooks HWLOC_NAME(set_freebsd_hooks)
+#define hwloc_set_netbsd_hooks HWLOC_NAME(set_netbsd_hooks)
+#define hwloc_set_hpux_hooks HWLOC_NAME(set_hpux_hooks)
+
+#define hwloc_add_uname_info HWLOC_NAME(add_uname_info)
+#define hwloc_free_unlinked_object HWLOC_NAME(free_unlinked_object)
+#define hwloc__duplicate_objects HWLOC_NAME(_duplicate_objects)
+
+#define hwloc_alloc_heap HWLOC_NAME(alloc_heap)
+#define hwloc_alloc_mmap HWLOC_NAME(alloc_mmap)
+#define hwloc_free_heap HWLOC_NAME(free_heap)
+#define hwloc_free_mmap HWLOC_NAME(free_mmap)
+#define hwloc_alloc_or_fail HWLOC_NAME(alloc_or_fail)
+
+#define hwloc_distances_init HWLOC_NAME(distances_init)
+#define hwloc_distances_destroy HWLOC_NAME(distances_destroy)
+#define hwloc_distances_set HWLOC_NAME(distances_set)
+#define hwloc_distances_set_from_env HWLOC_NAME(distances_set_from_env)
+#define hwloc_distances_restrict_os HWLOC_NAME(distances_restrict_os)
+#define hwloc_distances_restrict HWLOC_NAME(distances_restrict)
+#define hwloc_distances_finalize_os HWLOC_NAME(distances_finalize_os)
+#define hwloc_distances_finalize_logical HWLOC_NAME(distances_finalize_logical)
+#define hwloc_clear_object_distances HWLOC_NAME(clear_object_distances)
+#define hwloc_clear_object_distances_one HWLOC_NAME(clear_object_distances_one)
+#define hwloc_group_by_distances HWLOC_NAME(group_by_distances)
+
+#define hwloc_encode_to_base64 HWLOC_NAME(encode_to_base64)
+#define hwloc_decode_from_base64 HWLOC_NAME(decode_from_base64)
+
+#define hwloc_obj_add_info_nodup HWLOC_NAME(obj_add_info_nodup)
+
+/* private/solaris-chiptype.h */
+
+#define hwloc_solaris_get_chip_type HWLOC_NAME(solaris_get_chip_type)
+#define hwloc_solaris_get_chip_model HWLOC_NAME(solaris_get_chip_model)
+
+#endif /* HWLOC_SYM_TRANSFORM */
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+
+#endif /* HWLOC_RENAME_H */
diff --git a/ext/hwloc/include/numa.h b/ext/hwloc/include/numa.h
new file mode 100644
index 000000000..1dbc13728
--- /dev/null
+++ b/ext/hwloc/include/numa.h
@@ -0,0 +1,468 @@
+/* Copyright (C) 2003,2004 Andi Kleen, SuSE Labs.
+
+ libnuma is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; version
+ 2.1.
+
+ libnuma is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should find a copy of v2.1 of the GNU Lesser General Public License
+ somewhere on your Linux system; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _NUMA_H
+#define _NUMA_H 1
+
+/* allow an application to test for the current programming interface: */
+#define LIBNUMA_API_VERSION 2
+
+/* Simple NUMA policy library */
+
+#include
+#include
+#include
+#include
+
+#if defined(__x86_64__) || defined(__i386__)
+#define NUMA_NUM_NODES 128
+#else
+#define NUMA_NUM_NODES 2048
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ unsigned long n[NUMA_NUM_NODES/(sizeof(unsigned long)*8)];
+} nodemask_t;
+
+struct bitmask {
+ unsigned long size; /* number of bits in the map */
+ unsigned long *maskp;
+};
+
+/* operations on struct bitmask */
+int numa_bitmask_isbitset(const struct bitmask *, unsigned int);
+struct bitmask *numa_bitmask_setall(struct bitmask *);
+struct bitmask *numa_bitmask_clearall(struct bitmask *);
+struct bitmask *numa_bitmask_setbit(struct bitmask *, unsigned int);
+struct bitmask *numa_bitmask_clearbit(struct bitmask *, unsigned int);
+unsigned int numa_bitmask_nbytes(struct bitmask *);
+struct bitmask *numa_bitmask_alloc(unsigned int);
+void numa_bitmask_free(struct bitmask *);
+int numa_bitmask_equal(const struct bitmask *, const struct bitmask *);
+void copy_nodemask_to_bitmask(nodemask_t *, struct bitmask *);
+void copy_bitmask_to_nodemask(struct bitmask *, nodemask_t *);
+void copy_bitmask_to_bitmask(struct bitmask *, struct bitmask *);
+
+/* compatibility for codes that used them: */
+
+static inline void nodemask_zero(nodemask_t *mask)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)mask;
+ tmp.size = sizeof(nodemask_t) * 8;
+ numa_bitmask_clearall(&tmp);
+}
+
+static inline void nodemask_zero_compat(nodemask_t *mask)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)mask;
+ tmp.size = sizeof(nodemask_t) * 8;
+ numa_bitmask_clearall(&tmp);
+}
+
+static inline void nodemask_set_compat(nodemask_t *mask, int node)
+{
+ mask->n[node / (8*sizeof(unsigned long))] |=
+ (1UL<<(node%(8*sizeof(unsigned long))));
+}
+
+static inline void nodemask_clr_compat(nodemask_t *mask, int node)
+{
+ mask->n[node / (8*sizeof(unsigned long))] &=
+ ~(1UL<<(node%(8*sizeof(unsigned long))));
+}
+
+static inline int nodemask_isset_compat(const nodemask_t *mask, int node)
+{
+ if ((unsigned)node >= NUMA_NUM_NODES)
+ return 0;
+ if (mask->n[node / (8*sizeof(unsigned long))] &
+ (1UL<<(node%(8*sizeof(unsigned long)))))
+ return 1;
+ return 0;
+}
+
+static inline int nodemask_equal(const nodemask_t *a, const nodemask_t *b)
+{
+ struct bitmask tmp_a, tmp_b;
+
+ tmp_a.maskp = (unsigned long *)a;
+ tmp_a.size = sizeof(nodemask_t) * 8;
+
+ tmp_b.maskp = (unsigned long *)b;
+ tmp_b.size = sizeof(nodemask_t) * 8;
+
+ return numa_bitmask_equal(&tmp_a, &tmp_b);
+}
+
+static inline int nodemask_equal_compat(const nodemask_t *a, const nodemask_t *b)
+{
+ struct bitmask tmp_a, tmp_b;
+
+ tmp_a.maskp = (unsigned long *)a;
+ tmp_a.size = sizeof(nodemask_t) * 8;
+
+ tmp_b.maskp = (unsigned long *)b;
+ tmp_b.size = sizeof(nodemask_t) * 8;
+
+ return numa_bitmask_equal(&tmp_a, &tmp_b);
+}
+
+/* NUMA support available. If this returns a negative value all other function
+ in this library are undefined. */
+int numa_available(void);
+
+/* Basic NUMA state */
+
+/* Get max available node */
+int numa_max_node(void);
+int numa_max_possible_node(void);
+/* Return preferred node */
+int numa_preferred(void);
+
+/* Return node size and free memory */
+long long numa_node_size64(int node, long long *freep);
+long numa_node_size(int node, long *freep);
+
+int numa_pagesize(void);
+
+/* Set with all nodes from which the calling process may allocate memory.
+ Only valid after numa_available. */
+extern struct bitmask *numa_all_nodes_ptr;
+
+/* Set with all nodes the kernel has exposed to userspace */
+extern struct bitmask *numa_nodes_ptr;
+
+/* For source compatibility */
+extern nodemask_t numa_all_nodes;
+
+/* Set with all cpus. */
+extern struct bitmask *numa_all_cpus_ptr;
+
+/* Set with no nodes */
+extern struct bitmask *numa_no_nodes_ptr;
+
+/* Source compatibility */
+extern nodemask_t numa_no_nodes;
+
+/* Only run and allocate memory from a specific set of nodes. */
+void numa_bind(struct bitmask *nodes);
+
+/* Set the NUMA node interleaving mask. 0 to turn off interleaving */
+void numa_set_interleave_mask(struct bitmask *nodemask);
+
+/* Return the current interleaving mask */
+struct bitmask *numa_get_interleave_mask(void);
+
+/* allocate a bitmask big enough for all nodes */
+struct bitmask *numa_allocate_nodemask(void);
+
+static inline void numa_free_nodemask(struct bitmask *b)
+{
+ numa_bitmask_free(b);
+}
+
+/* Some node to preferably allocate memory from for task. */
+void numa_set_preferred(int node);
+
+/* Set local memory allocation policy for task */
+void numa_set_localalloc(void);
+
+/* Only allocate memory from the nodes set in mask. 0 to turn off */
+void numa_set_membind(struct bitmask *nodemask);
+
+/* Return current membind */
+struct bitmask *numa_get_membind(void);
+
+/* Return allowed memories [nodes] */
+struct bitmask *numa_get_mems_allowed(void);
+
+int numa_get_interleave_node(void);
+
+/* NUMA memory allocation. These functions always round to page size
+ and are relatively slow. */
+
+/* Alloc memory page interleaved on nodes in mask */
+void *numa_alloc_interleaved_subset(size_t size, struct bitmask *nodemask);
+/* Alloc memory page interleaved on all nodes. */
+void *numa_alloc_interleaved(size_t size);
+/* Alloc memory located on node */
+void *numa_alloc_onnode(size_t size, int node);
+/* Alloc memory on local node */
+void *numa_alloc_local(size_t size);
+/* Allocation with current policy */
+void *numa_alloc(size_t size);
+/* Change the size of a memory area preserving the memory policy */
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
+/* Free memory allocated by the functions above */
+void numa_free(void *mem, size_t size);
+
+/* Low level functions, primarily for shared memory. All memory
+ processed by these must not be touched yet */
+
+/* Interleave an memory area. */
+void numa_interleave_memory(void *mem, size_t size, struct bitmask *mask);
+
+/* Allocate a memory area on a specific node. */
+void numa_tonode_memory(void *start, size_t size, int node);
+
+/* Allocate memory on a mask of nodes. */
+void numa_tonodemask_memory(void *mem, size_t size, struct bitmask *mask);
+
+/* Allocate a memory area on the current node. */
+void numa_setlocal_memory(void *start, size_t size);
+
+/* Allocate memory area with current memory policy */
+void numa_police_memory(void *start, size_t size);
+
+/* Run current task only on nodes in mask */
+int numa_run_on_node_mask(struct bitmask *mask);
+/* Run current task only on node */
+int numa_run_on_node(int node);
+/* Return current mask of nodes the task can run on */
+struct bitmask * numa_get_run_node_mask(void);
+
+/* When strict fail allocation when memory cannot be allocated in target node(s). */
+void numa_set_bind_policy(int strict);
+
+/* Fail when existing memory has incompatible policy */
+void numa_set_strict(int flag);
+
+/* maximum nodes (size of kernel nodemask_t) */
+int numa_num_possible_nodes();
+
+/* maximum cpus (size of kernel cpumask_t) */
+int numa_num_possible_cpus();
+
+/* nodes in the system */
+int numa_num_configured_nodes();
+
+/* maximum cpus */
+int numa_num_configured_cpus();
+
+/* maximum cpus allowed to current task */
+int numa_num_task_cpus();
+int numa_num_thread_cpus(); /* backward compatibility */
+
+/* maximum nodes allowed to current task */
+int numa_num_task_nodes();
+int numa_num_thread_nodes(); /* backward compatibility */
+
+/* allocate a bitmask the size of the kernel cpumask_t */
+struct bitmask *numa_allocate_cpumask();
+
+static inline void numa_free_cpumask(struct bitmask *b)
+{
+ numa_bitmask_free(b);
+}
+
+/* Convert node to CPU mask. -1/errno on failure, otherwise 0. */
+int numa_node_to_cpus(int, struct bitmask *);
+
+/* report the node of the specified cpu. -1/errno on invalid cpu. */
+int numa_node_of_cpu(int cpu);
+
+/* Report distance of node1 from node2. 0 on error.*/
+int numa_distance(int node1, int node2);
+
+/* Error handling. */
+/* This is an internal function in libnuma that can be overwritten by an user
+ program. Default is to print an error to stderr and exit if numa_exit_on_error
+ is true. */
+void numa_error(char *where);
+
+/* When true exit the program when a NUMA system call (except numa_available)
+ fails */
+extern int numa_exit_on_error;
+/* Warning function. Can also be overwritten. Default is to print on stderr
+ once. */
+void numa_warn(int num, char *fmt, ...);
+
+/* When true exit the program on a numa_warn() call */
+extern int numa_exit_on_warn;
+
+int numa_migrate_pages(int pid, struct bitmask *from, struct bitmask *to);
+
+int numa_move_pages(int pid, unsigned long count, void **pages,
+ const int *nodes, int *status, int flags);
+
+int numa_sched_getaffinity(pid_t, struct bitmask *);
+int numa_sched_setaffinity(pid_t, struct bitmask *);
+
+/* Convert an ascii list of nodes to a bitmask */
+struct bitmask *numa_parse_nodestring(char *);
+
+/* Convert an ascii list of cpu to a bitmask */
+struct bitmask *numa_parse_cpustring(char *);
+
+/*
+ * The following functions are for source code compatibility
+ * with releases prior to version 2.
+ * Such codes should be compiled with NUMA_VERSION1_COMPATIBILITY defined.
+ */
+
+static inline void numa_set_interleave_mask_compat(nodemask_t *nodemask)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)nodemask;
+ tmp.size = sizeof(nodemask_t) * 8;
+ numa_set_interleave_mask(&tmp);
+}
+
+static inline nodemask_t numa_get_interleave_mask_compat()
+{
+ struct bitmask *tp;
+ nodemask_t mask;
+
+ tp = numa_get_interleave_mask();
+ copy_bitmask_to_nodemask(tp, &mask);
+ numa_bitmask_free(tp);
+ return mask;
+}
+
+static inline void numa_bind_compat(nodemask_t *mask)
+{
+ struct bitmask *tp;
+
+ tp = numa_allocate_nodemask();
+ copy_nodemask_to_bitmask(mask, tp);
+ numa_bind(tp);
+ numa_bitmask_free(tp);
+}
+
+static inline void numa_set_membind_compat(nodemask_t *mask)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)mask;
+ tmp.size = sizeof(nodemask_t) * 8;
+ numa_set_membind(&tmp);
+}
+
+static inline nodemask_t numa_get_membind_compat()
+{
+ struct bitmask *tp;
+ nodemask_t mask;
+
+ tp = numa_get_membind();
+ copy_bitmask_to_nodemask(tp, &mask);
+ numa_bitmask_free(tp);
+ return mask;
+}
+
+static inline void *numa_alloc_interleaved_subset_compat(size_t size,
+ const nodemask_t *mask)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)mask;
+ tmp.size = sizeof(nodemask_t) * 8;
+ return numa_alloc_interleaved_subset(size, &tmp);
+}
+
+static inline int numa_run_on_node_mask_compat(const nodemask_t *mask)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)mask;
+ tmp.size = sizeof(nodemask_t) * 8;
+ return numa_run_on_node_mask(&tmp);
+}
+
+static inline nodemask_t numa_get_run_node_mask_compat()
+{
+ struct bitmask *tp;
+ nodemask_t mask;
+
+ tp = numa_get_run_node_mask();
+ copy_bitmask_to_nodemask(tp, &mask);
+ numa_bitmask_free(tp);
+ return mask;
+}
+
+static inline void numa_interleave_memory_compat(void *mem, size_t size,
+ const nodemask_t *mask)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)mask;
+ tmp.size = sizeof(nodemask_t) * 8;
+ numa_interleave_memory(mem, size, &tmp);
+}
+
+static inline void numa_tonodemask_memory_compat(void *mem, size_t size,
+ const nodemask_t *mask)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)mask;
+ tmp.size = sizeof(nodemask_t) * 8;
+ numa_tonodemask_memory(mem, size, &tmp);
+}
+
+static inline int numa_sched_getaffinity_compat(pid_t pid, unsigned len,
+ unsigned long *mask)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)mask;
+ tmp.size = len * 8;
+ return numa_sched_getaffinity(pid, &tmp);
+}
+
+static inline int numa_sched_setaffinity_compat(pid_t pid, unsigned len,
+ unsigned long *mask)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)mask;
+ tmp.size = len * 8;
+ return numa_sched_setaffinity(pid, &tmp);
+}
+
+static inline int numa_node_to_cpus_compat(int node, unsigned long *buffer,
+ int buffer_len)
+{
+ struct bitmask tmp;
+
+ tmp.maskp = (unsigned long *)buffer;
+ tmp.size = buffer_len * 8;
+ return numa_node_to_cpus(node, &tmp);
+}
+
+/* end of version 1 compatibility functions */
+
+/*
+ * To compile an application that uses libnuma version 1:
+ * add -DNUMA_VERSION1_COMPATIBILITY to your Makefile's CFLAGS
+ */
+#ifdef NUMA_VERSION1_COMPATIBILITY
+#include
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/ext/hwloc/include/pci/config.h b/ext/hwloc/include/pci/config.h
new file mode 100644
index 000000000..1f05b1241
--- /dev/null
+++ b/ext/hwloc/include/pci/config.h
@@ -0,0 +1,18 @@
+#define PCI_CONFIG_H
+#define PCI_ARCH_X86_64
+#define PCI_OS_LINUX
+#define PCI_HAVE_PM_LINUX_SYSFS
+#define PCI_HAVE_PM_LINUX_PROC
+#define PCI_HAVE_LINUX_BYTEORDER_H
+#define PCI_PATH_PROC_BUS_PCI "/proc/bus/pci"
+#define PCI_PATH_SYS_BUS_PCI "/sys/bus/pci"
+#define PCI_HAVE_PM_INTEL_CONF
+#define PCI_HAVE_64BIT_ADDRESS
+#define PCI_HAVE_PM_DUMP
+#define PCI_COMPRESSED_IDS
+#define PCI_IDS "pci.ids.gz"
+#define PCI_PATH_IDS_DIR "/usr/share/misc"
+#define PCI_USE_DNS
+#define PCI_ID_DOMAIN "pci.id.ucw.cz"
+#define PCI_SHARED_LIB
+#define PCILIB_VERSION "3.1.8"
diff --git a/ext/hwloc/include/pci/header.h b/ext/hwloc/include/pci/header.h
new file mode 100644
index 000000000..d481f2769
--- /dev/null
+++ b/ext/hwloc/include/pci/header.h
@@ -0,0 +1,1195 @@
+/*
+ * The PCI Library -- PCI Header Structure (based on )
+ *
+ * Copyright (c) 1997--2010 Martin Mares
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/*
+ * Under PCI, each device has 256 bytes of configuration address space,
+ * of which the first 64 bytes are standardized as follows:
+ */
+#define PCI_VENDOR_ID 0x00 /* 16 bits */
+#define PCI_DEVICE_ID 0x02 /* 16 bits */
+#define PCI_COMMAND 0x04 /* 16 bits */
+#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */
+#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */
+#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */
+#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */
+#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */
+#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */
+#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */
+#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */
+#define PCI_COMMAND_SERR 0x100 /* Enable SERR */
+#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */
+#define PCI_COMMAND_DISABLE_INTx 0x400 /* PCIE: Disable INTx interrupts */
+
+#define PCI_STATUS 0x06 /* 16 bits */
+#define PCI_STATUS_INTx 0x08 /* PCIE: INTx interrupt pending */
+#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */
+#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */
+#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */
+#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */
+#define PCI_STATUS_PARITY 0x100 /* Detected parity error */
+#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */
+#define PCI_STATUS_DEVSEL_FAST 0x000
+#define PCI_STATUS_DEVSEL_MEDIUM 0x200
+#define PCI_STATUS_DEVSEL_SLOW 0x400
+#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */
+#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */
+#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */
+#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */
+#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */
+
+#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8
+ revision */
+#define PCI_REVISION_ID 0x08 /* Revision ID */
+#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */
+#define PCI_CLASS_DEVICE 0x0a /* Device class */
+
+#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */
+#define PCI_LATENCY_TIMER 0x0d /* 8 bits */
+#define PCI_HEADER_TYPE 0x0e /* 8 bits */
+#define PCI_HEADER_TYPE_NORMAL 0
+#define PCI_HEADER_TYPE_BRIDGE 1
+#define PCI_HEADER_TYPE_CARDBUS 2
+
+#define PCI_BIST 0x0f /* 8 bits */
+#define PCI_BIST_CODE_MASK 0x0f /* Return result */
+#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */
+#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */
+
+/*
+ * Base addresses specify locations in memory or I/O space.
+ * Decoded size can be determined by writing a value of
+ * 0xffffffff to the register, and reading it back. Only
+ * 1 bits are decoded.
+ */
+#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */
+#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */
+#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */
+#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */
+#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */
+#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */
+#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */
+#define PCI_BASE_ADDRESS_SPACE_IO 0x01
+#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00
+#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06
+#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */
+#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */
+#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */
+#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */
+#define PCI_BASE_ADDRESS_MEM_MASK (~(pciaddr_t)0x0f)
+#define PCI_BASE_ADDRESS_IO_MASK (~(pciaddr_t)0x03)
+/* bit 1 is reserved if address_space = 1 */
+
+/* Header type 0 (normal devices) */
+#define PCI_CARDBUS_CIS 0x28
+#define PCI_SUBSYSTEM_VENDOR_ID 0x2c
+#define PCI_SUBSYSTEM_ID 0x2e
+#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */
+#define PCI_ROM_ADDRESS_ENABLE 0x01
+#define PCI_ROM_ADDRESS_MASK (~(pciaddr_t)0x7ff)
+
+#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */
+
+/* 0x35-0x3b are reserved */
+#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */
+#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */
+#define PCI_MIN_GNT 0x3e /* 8 bits */
+#define PCI_MAX_LAT 0x3f /* 8 bits */
+
+/* Header type 1 (PCI-to-PCI bridges) */
+#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */
+#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */
+#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */
+#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */
+#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */
+#define PCI_IO_LIMIT 0x1d
+#define PCI_IO_RANGE_TYPE_MASK 0x0f /* I/O bridging type */
+#define PCI_IO_RANGE_TYPE_16 0x00
+#define PCI_IO_RANGE_TYPE_32 0x01
+#define PCI_IO_RANGE_MASK ~0x0f
+#define PCI_SEC_STATUS 0x1e /* Secondary status register */
+#define PCI_MEMORY_BASE 0x20 /* Memory range behind */
+#define PCI_MEMORY_LIMIT 0x22
+#define PCI_MEMORY_RANGE_TYPE_MASK 0x0f
+#define PCI_MEMORY_RANGE_MASK ~0x0f
+#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */
+#define PCI_PREF_MEMORY_LIMIT 0x26
+#define PCI_PREF_RANGE_TYPE_MASK 0x0f
+#define PCI_PREF_RANGE_TYPE_32 0x00
+#define PCI_PREF_RANGE_TYPE_64 0x01
+#define PCI_PREF_RANGE_MASK ~0x0f
+#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */
+#define PCI_PREF_LIMIT_UPPER32 0x2c
+#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */
+#define PCI_IO_LIMIT_UPPER16 0x32
+/* 0x34 same as for htype 0 */
+/* 0x35-0x3b is reserved */
+#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_BRIDGE_CONTROL 0x3e
+#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */
+#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */
+#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */
+#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */
+#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */
+#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */
+#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */
+#define PCI_BRIDGE_CTL_PRI_DISCARD_TIMER 0x100 /* PCI-X? */
+#define PCI_BRIDGE_CTL_SEC_DISCARD_TIMER 0x200 /* PCI-X? */
+#define PCI_BRIDGE_CTL_DISCARD_TIMER_STATUS 0x400 /* PCI-X? */
+#define PCI_BRIDGE_CTL_DISCARD_TIMER_SERR_EN 0x800 /* PCI-X? */
+
+/* Header type 2 (CardBus bridges) */
+/* 0x14-0x15 reserved */
+#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */
+#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */
+#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */
+#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */
+#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */
+#define PCI_CB_MEMORY_BASE_0 0x1c
+#define PCI_CB_MEMORY_LIMIT_0 0x20
+#define PCI_CB_MEMORY_BASE_1 0x24
+#define PCI_CB_MEMORY_LIMIT_1 0x28
+#define PCI_CB_IO_BASE_0 0x2c
+#define PCI_CB_IO_BASE_0_HI 0x2e
+#define PCI_CB_IO_LIMIT_0 0x30
+#define PCI_CB_IO_LIMIT_0_HI 0x32
+#define PCI_CB_IO_BASE_1 0x34
+#define PCI_CB_IO_BASE_1_HI 0x36
+#define PCI_CB_IO_LIMIT_1 0x38
+#define PCI_CB_IO_LIMIT_1_HI 0x3a
+#define PCI_CB_IO_RANGE_MASK ~0x03
+/* 0x3c-0x3d are same as for htype 0 */
+#define PCI_CB_BRIDGE_CONTROL 0x3e
+#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */
+#define PCI_CB_BRIDGE_CTL_SERR 0x02
+#define PCI_CB_BRIDGE_CTL_ISA 0x04
+#define PCI_CB_BRIDGE_CTL_VGA 0x08
+#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20
+#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */
+#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */
+#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */
+#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200
+#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400
+#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40
+#define PCI_CB_SUBSYSTEM_ID 0x42
+#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */
+/* 0x48-0x7f reserved */
+
+/* Capability lists */
+
+#define PCI_CAP_LIST_ID 0 /* Capability ID */
+#define PCI_CAP_ID_PM 0x01 /* Power Management */
+#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */
+#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */
+#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */
+#define PCI_CAP_ID_MSI 0x05 /* Message Signaled Interrupts */
+#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */
+#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */
+#define PCI_CAP_ID_HT 0x08 /* HyperTransport */
+#define PCI_CAP_ID_VNDR 0x09 /* Vendor specific */
+#define PCI_CAP_ID_DBG 0x0A /* Debug port */
+#define PCI_CAP_ID_CCRC 0x0B /* CompactPCI Central Resource Control */
+#define PCI_CAP_ID_HOTPLUG 0x0C /* PCI hot-plug */
+#define PCI_CAP_ID_SSVID 0x0D /* Bridge subsystem vendor/device ID */
+#define PCI_CAP_ID_AGP3 0x0E /* AGP 8x */
+#define PCI_CAP_ID_SECURE 0x0F /* Secure device (?) */
+#define PCI_CAP_ID_EXP 0x10 /* PCI Express */
+#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */
+#define PCI_CAP_ID_SATA 0x12 /* Serial-ATA HBA */
+#define PCI_CAP_ID_AF 0x13 /* Advanced features of PCI devices integrated in PCIe root cplx */
+#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */
+#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */
+#define PCI_CAP_SIZEOF 4
+
+/* Capabilities residing in the PCI Express extended configuration space */
+
+#define PCI_EXT_CAP_ID_AER 0x01 /* Advanced Error Reporting */
+#define PCI_EXT_CAP_ID_VC 0x02 /* Virtual Channel */
+#define PCI_EXT_CAP_ID_DSN 0x03 /* Device Serial Number */
+#define PCI_EXT_CAP_ID_PB 0x04 /* Power Budgeting */
+#define PCI_EXT_CAP_ID_RCLINK 0x05 /* Root Complex Link Declaration */
+#define PCI_EXT_CAP_ID_RCILINK 0x06 /* Root Complex Internal Link Declaration */
+#define PCI_EXT_CAP_ID_RCECOLL 0x07 /* Root Complex Event Collector */
+#define PCI_EXT_CAP_ID_MFVC 0x08 /* Multi-Function Virtual Channel */
+#define PCI_EXT_CAP_ID_VC2 0x09 /* Virtual Channel (2nd ID) */
+#define PCI_EXT_CAP_ID_RBCB 0x0a /* Root Bridge Control Block */
+#define PCI_EXT_CAP_ID_VNDR 0x0b /* Vendor specific */
+#define PCI_EXT_CAP_ID_ACS 0x0d /* Access Controls */
+#define PCI_EXT_CAP_ID_ARI 0x0e /* Alternative Routing-ID Interpretation */
+#define PCI_EXT_CAP_ID_ATS 0x0f /* Address Translation Service */
+#define PCI_EXT_CAP_ID_SRIOV 0x10 /* Single Root I/O Virtualization */
+#define PCI_EXT_CAP_ID_TPH 0x17 /* Transaction processing hints */
+#define PCI_EXT_CAP_ID_LTR 0x18 /* Latency Tolerance Reporting */
+
+/*** Definitions of capabilities ***/
+
+/* Power Management Registers */
+
+#define PCI_PM_CAP_VER_MASK 0x0007 /* Version (2=PM1.1) */
+#define PCI_PM_CAP_PME_CLOCK 0x0008 /* Clock required for PME generation */
+#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization required */
+#define PCI_PM_CAP_AUX_C_MASK 0x01c0 /* Maximum aux current required in D3cold */
+#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */
+#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */
+#define PCI_PM_CAP_PME_D0 0x0800 /* PME can be asserted from D0 */
+#define PCI_PM_CAP_PME_D1 0x1000 /* PME can be asserted from D1 */
+#define PCI_PM_CAP_PME_D2 0x2000 /* PME can be asserted from D2 */
+#define PCI_PM_CAP_PME_D3_HOT 0x4000 /* PME can be asserted from D3hot */
+#define PCI_PM_CAP_PME_D3_COLD 0x8000 /* PME can be asserted from D3cold */
+#define PCI_PM_CTRL 4 /* PM control and status register */
+#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */
+#define PCI_PM_CTRL_NO_SOFT_RST 0x0008 /* No Soft Reset from D3hot to D0 */
+#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */
+#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* PM table data index */
+#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* PM table data scaling factor */
+#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */
+#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions */
+#define PCI_PM_PPB_B2_B3 0x40 /* If bridge enters D3hot, bus enters: 0=B3, 1=B2 */
+#define PCI_PM_BPCC_ENABLE 0x80 /* Secondary bus is power managed */
+#define PCI_PM_DATA_REGISTER 7 /* PM table contents read here */
+#define PCI_PM_SIZEOF 8
+
+/* AGP registers */
+
+#define PCI_AGP_VERSION 2 /* BCD version number */
+#define PCI_AGP_RFU 3 /* Rest of capability flags */
+#define PCI_AGP_STATUS 4 /* Status register */
+#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */
+#define PCI_AGP_STATUS_ISOCH 0x10000 /* Isochronous transactions supported */
+#define PCI_AGP_STATUS_ARQSZ_MASK 0xe000 /* log2(optimum async req size in bytes) - 4 */
+#define PCI_AGP_STATUS_CAL_MASK 0x1c00 /* Calibration cycle timing */
+#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */
+#define PCI_AGP_STATUS_ITA_COH 0x0100 /* In-aperture accesses always coherent */
+#define PCI_AGP_STATUS_GART64 0x0080 /* 64-bit GART entries supported */
+#define PCI_AGP_STATUS_HTRANS 0x0040 /* If 0, core logic can xlate host CPU accesses thru aperture */
+#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing cycles supported */
+#define PCI_AGP_STATUS_FW 0x0010 /* Fast write transfers supported */
+#define PCI_AGP_STATUS_AGP3 0x0008 /* AGP3 mode supported */
+#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported (RFU in AGP3 mode) */
+#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported (8x in AGP3 mode) */
+#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported (4x in AGP3 mode) */
+#define PCI_AGP_COMMAND 8 /* Control register */
+#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */
+#define PCI_AGP_COMMAND_ARQSZ_MASK 0xe000 /* log2(optimum async req size in bytes) - 4 */
+#define PCI_AGP_COMMAND_CAL_MASK 0x1c00 /* Calibration cycle timing */
+#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */
+#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */
+#define PCI_AGP_COMMAND_GART64 0x0080 /* 64-bit GART entries enabled */
+#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow generation of 64-bit addr cycles */
+#define PCI_AGP_COMMAND_FW 0x0010 /* Enable FW transfers */
+#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate (RFU in AGP3 mode) */
+#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate (8x in AGP3 mode) */
+#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate (4x in AGP3 mode) */
+#define PCI_AGP_SIZEOF 12
+
+/* Vital Product Data */
+
+#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */
+#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */
+#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */
+#define PCI_VPD_DATA 4 /* 32-bits of data returned here */
+
+/* Slot Identification */
+
+#define PCI_SID_ESR 2 /* Expansion Slot Register */
+#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */
+#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */
+#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */
+
+/* Message Signaled Interrupts registers */
+
+#define PCI_MSI_FLAGS 2 /* Various flags */
+#define PCI_MSI_FLAGS_MASK_BIT 0x100 /* interrupt masking & reporting supported */
+#define PCI_MSI_FLAGS_64BIT 0x080 /* 64-bit addresses allowed */
+#define PCI_MSI_FLAGS_QSIZE 0x070 /* Message queue size configured */
+#define PCI_MSI_FLAGS_QMASK 0x00e /* Maximum queue size available */
+#define PCI_MSI_FLAGS_ENABLE 0x001 /* MSI feature enabled */
+#define PCI_MSI_RFU 3 /* Rest of capability flags */
+#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */
+#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
+#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */
+#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */
+#define PCI_MSI_MASK_BIT_32 12 /* per-vector masking for 32-bit devices */
+#define PCI_MSI_MASK_BIT_64 16 /* per-vector masking for 64-bit devices */
+#define PCI_MSI_PENDING_32 16 /* per-vector interrupt pending for 32-bit devices */
+#define PCI_MSI_PENDING_64 20 /* per-vector interrupt pending for 64-bit devices */
+
+/* PCI-X */
+#define PCI_PCIX_COMMAND 2 /* Command register offset */
+#define PCI_PCIX_COMMAND_DPERE 0x0001 /* Data Parity Error Recover Enable */
+#define PCI_PCIX_COMMAND_ERO 0x0002 /* Enable Relaxed Ordering */
+#define PCI_PCIX_COMMAND_MAX_MEM_READ_BYTE_COUNT 0x000c /* Maximum Memory Read Byte Count */
+#define PCI_PCIX_COMMAND_MAX_OUTSTANDING_SPLIT_TRANS 0x0070
+#define PCI_PCIX_COMMAND_RESERVED 0xf80
+#define PCI_PCIX_STATUS 4 /* Status register offset */
+#define PCI_PCIX_STATUS_FUNCTION 0x00000007
+#define PCI_PCIX_STATUS_DEVICE 0x000000f8
+#define PCI_PCIX_STATUS_BUS 0x0000ff00
+#define PCI_PCIX_STATUS_64BIT 0x00010000
+#define PCI_PCIX_STATUS_133MHZ 0x00020000
+#define PCI_PCIX_STATUS_SC_DISCARDED 0x00040000 /* Split Completion Discarded */
+#define PCI_PCIX_STATUS_UNEXPECTED_SC 0x00080000 /* Unexpected Split Completion */
+#define PCI_PCIX_STATUS_DEVICE_COMPLEXITY 0x00100000 /* 0 = simple device, 1 = bridge device */
+#define PCI_PCIX_STATUS_DESIGNED_MAX_MEM_READ_BYTE_COUNT 0x00600000 /* 0 = 512 bytes, 1 = 1024, 2 = 2048, 3 = 4096 */
+#define PCI_PCIX_STATUS_DESIGNED_MAX_OUTSTANDING_SPLIT_TRANS 0x03800000
+#define PCI_PCIX_STATUS_DESIGNED_MAX_CUMULATIVE_READ_SIZE 0x1c000000
+#define PCI_PCIX_STATUS_RCVD_SC_ERR_MESS 0x20000000 /* Received Split Completion Error Message */
+#define PCI_PCIX_STATUS_266MHZ 0x40000000 /* 266 MHz capable */
+#define PCI_PCIX_STATUS_533MHZ 0x80000000 /* 533 MHz capable */
+#define PCI_PCIX_SIZEOF 4
+
+/* PCI-X Bridges */
+#define PCI_PCIX_BRIDGE_SEC_STATUS 2 /* Secondary bus status register offset */
+#define PCI_PCIX_BRIDGE_SEC_STATUS_64BIT 0x0001
+#define PCI_PCIX_BRIDGE_SEC_STATUS_133MHZ 0x0002
+#define PCI_PCIX_BRIDGE_SEC_STATUS_SC_DISCARDED 0x0004 /* Split Completion Discarded on secondary bus */
+#define PCI_PCIX_BRIDGE_SEC_STATUS_UNEXPECTED_SC 0x0008 /* Unexpected Split Completion on secondary bus */
+#define PCI_PCIX_BRIDGE_SEC_STATUS_SC_OVERRUN 0x0010 /* Split Completion Overrun on secondary bus */
+#define PCI_PCIX_BRIDGE_SEC_STATUS_SPLIT_REQUEST_DELAYED 0x0020
+#define PCI_PCIX_BRIDGE_SEC_STATUS_CLOCK_FREQ 0x01c0
+#define PCI_PCIX_BRIDGE_SEC_STATUS_RESERVED 0xfe00
+#define PCI_PCIX_BRIDGE_STATUS 4 /* Primary bus status register offset */
+#define PCI_PCIX_BRIDGE_STATUS_FUNCTION 0x00000007
+#define PCI_PCIX_BRIDGE_STATUS_DEVICE 0x000000f8
+#define PCI_PCIX_BRIDGE_STATUS_BUS 0x0000ff00
+#define PCI_PCIX_BRIDGE_STATUS_64BIT 0x00010000
+#define PCI_PCIX_BRIDGE_STATUS_133MHZ 0x00020000
+#define PCI_PCIX_BRIDGE_STATUS_SC_DISCARDED 0x00040000 /* Split Completion Discarded */
+#define PCI_PCIX_BRIDGE_STATUS_UNEXPECTED_SC 0x00080000 /* Unexpected Split Completion */
+#define PCI_PCIX_BRIDGE_STATUS_SC_OVERRUN 0x00100000 /* Split Completion Overrun */
+#define PCI_PCIX_BRIDGE_STATUS_SPLIT_REQUEST_DELAYED 0x00200000
+#define PCI_PCIX_BRIDGE_STATUS_RESERVED 0xffc00000
+#define PCI_PCIX_BRIDGE_UPSTREAM_SPLIT_TRANS_CTRL 8 /* Upstream Split Transaction Register offset */
+#define PCI_PCIX_BRIDGE_DOWNSTREAM_SPLIT_TRANS_CTRL 12 /* Downstream Split Transaction Register offset */
+#define PCI_PCIX_BRIDGE_STR_CAPACITY 0x0000ffff
+#define PCI_PCIX_BRIDGE_STR_COMMITMENT_LIMIT 0xffff0000
+#define PCI_PCIX_BRIDGE_SIZEOF 12
+
+/* HyperTransport (as of spec rev. 2.00) */
+#define PCI_HT_CMD 2 /* Command Register */
+#define PCI_HT_CMD_TYP_HI 0xe000 /* Capability Type high part */
+#define PCI_HT_CMD_TYP_HI_PRI 0x0000 /* Slave or Primary Interface */
+#define PCI_HT_CMD_TYP_HI_SEC 0x2000 /* Host or Secondary Interface */
+#define PCI_HT_CMD_TYP 0xf800 /* Capability Type */
+#define PCI_HT_CMD_TYP_SW 0x4000 /* Switch */
+#define PCI_HT_CMD_TYP_IDC 0x8000 /* Interrupt Discovery and Configuration */
+#define PCI_HT_CMD_TYP_RID 0x8800 /* Revision ID */
+#define PCI_HT_CMD_TYP_UIDC 0x9000 /* UnitID Clumping */
+#define PCI_HT_CMD_TYP_ECSA 0x9800 /* Extended Configuration Space Access */
+#define PCI_HT_CMD_TYP_AM 0xa000 /* Address Mapping */
+#define PCI_HT_CMD_TYP_MSIM 0xa800 /* MSI Mapping */
+#define PCI_HT_CMD_TYP_DR 0xb000 /* DirectRoute */
+#define PCI_HT_CMD_TYP_VCS 0xb800 /* VCSet */
+#define PCI_HT_CMD_TYP_RM 0xc000 /* Retry Mode */
+#define PCI_HT_CMD_TYP_X86 0xc800 /* X86 (reserved) */
+
+ /* Link Control Register */
+#define PCI_HT_LCTR_CFLE 0x0002 /* CRC Flood Enable */
+#define PCI_HT_LCTR_CST 0x0004 /* CRC Start Test */
+#define PCI_HT_LCTR_CFE 0x0008 /* CRC Force Error */
+#define PCI_HT_LCTR_LKFAIL 0x0010 /* Link Failure */
+#define PCI_HT_LCTR_INIT 0x0020 /* Initialization Complete */
+#define PCI_HT_LCTR_EOC 0x0040 /* End of Chain */
+#define PCI_HT_LCTR_TXO 0x0080 /* Transmitter Off */
+#define PCI_HT_LCTR_CRCERR 0x0f00 /* CRC Error */
+#define PCI_HT_LCTR_ISOCEN 0x1000 /* Isochronous Flow Control Enable */
+#define PCI_HT_LCTR_LSEN 0x2000 /* LDTSTOP# Tristate Enable */
+#define PCI_HT_LCTR_EXTCTL 0x4000 /* Extended CTL Time */
+#define PCI_HT_LCTR_64B 0x8000 /* 64-bit Addressing Enable */
+
+ /* Link Configuration Register */
+#define PCI_HT_LCNF_MLWI 0x0007 /* Max Link Width In */
+#define PCI_HT_LCNF_LW_8B 0x0 /* Link Width 8 bits */
+#define PCI_HT_LCNF_LW_16B 0x1 /* Link Width 16 bits */
+#define PCI_HT_LCNF_LW_32B 0x3 /* Link Width 32 bits */
+#define PCI_HT_LCNF_LW_2B 0x4 /* Link Width 2 bits */
+#define PCI_HT_LCNF_LW_4B 0x5 /* Link Width 4 bits */
+#define PCI_HT_LCNF_LW_NC 0x7 /* Link physically not connected */
+#define PCI_HT_LCNF_DFI 0x0008 /* Doubleword Flow Control In */
+#define PCI_HT_LCNF_MLWO 0x0070 /* Max Link Width Out */
+#define PCI_HT_LCNF_DFO 0x0080 /* Doubleword Flow Control Out */
+#define PCI_HT_LCNF_LWI 0x0700 /* Link Width In */
+#define PCI_HT_LCNF_DFIE 0x0800 /* Doubleword Flow Control In Enable */
+#define PCI_HT_LCNF_LWO 0x7000 /* Link Width Out */
+#define PCI_HT_LCNF_DFOE 0x8000 /* Doubleword Flow Control Out Enable */
+
+ /* Revision ID Register */
+#define PCI_HT_RID_MIN 0x1f /* Minor Revision */
+#define PCI_HT_RID_MAJ 0xe0 /* Major Revision */
+
+ /* Link Frequency/Error Register */
+#define PCI_HT_LFRER_FREQ 0x0f /* Transmitter Clock Frequency */
+#define PCI_HT_LFRER_200 0x00 /* 200MHz */
+#define PCI_HT_LFRER_300 0x01 /* 300MHz */
+#define PCI_HT_LFRER_400 0x02 /* 400MHz */
+#define PCI_HT_LFRER_500 0x03 /* 500MHz */
+#define PCI_HT_LFRER_600 0x04 /* 600MHz */
+#define PCI_HT_LFRER_800 0x05 /* 800MHz */
+#define PCI_HT_LFRER_1000 0x06 /* 1.0GHz */
+#define PCI_HT_LFRER_1200 0x07 /* 1.2GHz */
+#define PCI_HT_LFRER_1400 0x08 /* 1.4GHz */
+#define PCI_HT_LFRER_1600 0x09 /* 1.6GHz */
+#define PCI_HT_LFRER_VEND 0x0f /* Vendor-Specific */
+#define PCI_HT_LFRER_ERR 0xf0 /* Link Error */
+#define PCI_HT_LFRER_PROT 0x10 /* Protocol Error */
+#define PCI_HT_LFRER_OV 0x20 /* Overflow Error */
+#define PCI_HT_LFRER_EOC 0x40 /* End of Chain Error */
+#define PCI_HT_LFRER_CTLT 0x80 /* CTL Timeout */
+
+ /* Link Frequency Capability Register */
+#define PCI_HT_LFCAP_200 0x0001 /* 200MHz */
+#define PCI_HT_LFCAP_300 0x0002 /* 300MHz */
+#define PCI_HT_LFCAP_400 0x0004 /* 400MHz */
+#define PCI_HT_LFCAP_500 0x0008 /* 500MHz */
+#define PCI_HT_LFCAP_600 0x0010 /* 600MHz */
+#define PCI_HT_LFCAP_800 0x0020 /* 800MHz */
+#define PCI_HT_LFCAP_1000 0x0040 /* 1.0GHz */
+#define PCI_HT_LFCAP_1200 0x0080 /* 1.2GHz */
+#define PCI_HT_LFCAP_1400 0x0100 /* 1.4GHz */
+#define PCI_HT_LFCAP_1600 0x0200 /* 1.6GHz */
+#define PCI_HT_LFCAP_VEND 0x8000 /* Vendor-Specific */
+
+ /* Feature Register */
+#define PCI_HT_FTR_ISOCFC 0x0001 /* Isochronous Flow Control Mode */
+#define PCI_HT_FTR_LDTSTOP 0x0002 /* LDTSTOP# Supported */
+#define PCI_HT_FTR_CRCTM 0x0004 /* CRC Test Mode */
+#define PCI_HT_FTR_ECTLT 0x0008 /* Extended CTL Time Required */
+#define PCI_HT_FTR_64BA 0x0010 /* 64-bit Addressing */
+#define PCI_HT_FTR_UIDRD 0x0020 /* UnitID Reorder Disable */
+
+ /* Error Handling Register */
+#define PCI_HT_EH_PFLE 0x0001 /* Protocol Error Flood Enable */
+#define PCI_HT_EH_OFLE 0x0002 /* Overflow Error Flood Enable */
+#define PCI_HT_EH_PFE 0x0004 /* Protocol Error Fatal Enable */
+#define PCI_HT_EH_OFE 0x0008 /* Overflow Error Fatal Enable */
+#define PCI_HT_EH_EOCFE 0x0010 /* End of Chain Error Fatal Enable */
+#define PCI_HT_EH_RFE 0x0020 /* Response Error Fatal Enable */
+#define PCI_HT_EH_CRCFE 0x0040 /* CRC Error Fatal Enable */
+#define PCI_HT_EH_SERRFE 0x0080 /* System Error Fatal Enable (B */
+#define PCI_HT_EH_CF 0x0100 /* Chain Fail */
+#define PCI_HT_EH_RE 0x0200 /* Response Error */
+#define PCI_HT_EH_PNFE 0x0400 /* Protocol Error Nonfatal Enable */
+#define PCI_HT_EH_ONFE 0x0800 /* Overflow Error Nonfatal Enable */
+#define PCI_HT_EH_EOCNFE 0x1000 /* End of Chain Error Nonfatal Enable */
+#define PCI_HT_EH_RNFE 0x2000 /* Response Error Nonfatal Enable */
+#define PCI_HT_EH_CRCNFE 0x4000 /* CRC Error Nonfatal Enable */
+#define PCI_HT_EH_SERRNFE 0x8000 /* System Error Nonfatal Enable */
+
+/* HyperTransport: Slave or Primary Interface */
+#define PCI_HT_PRI_CMD 2 /* Command Register */
+#define PCI_HT_PRI_CMD_BUID 0x001f /* Base UnitID */
+#define PCI_HT_PRI_CMD_UC 0x03e0 /* Unit Count */
+#define PCI_HT_PRI_CMD_MH 0x0400 /* Master Host */
+#define PCI_HT_PRI_CMD_DD 0x0800 /* Default Direction */
+#define PCI_HT_PRI_CMD_DUL 0x1000 /* Drop on Uninitialized Link */
+
+#define PCI_HT_PRI_LCTR0 4 /* Link Control 0 Register */
+#define PCI_HT_PRI_LCNF0 6 /* Link Config 0 Register */
+#define PCI_HT_PRI_LCTR1 8 /* Link Control 1 Register */
+#define PCI_HT_PRI_LCNF1 10 /* Link Config 1 Register */
+#define PCI_HT_PRI_RID 12 /* Revision ID Register */
+#define PCI_HT_PRI_LFRER0 13 /* Link Frequency/Error 0 Register */
+#define PCI_HT_PRI_LFCAP0 14 /* Link Frequency Capability 0 Register */
+#define PCI_HT_PRI_FTR 16 /* Feature Register */
+#define PCI_HT_PRI_LFRER1 17 /* Link Frequency/Error 1 Register */
+#define PCI_HT_PRI_LFCAP1 18 /* Link Frequency Capability 1 Register */
+#define PCI_HT_PRI_ES 20 /* Enumeration Scratchpad Register */
+#define PCI_HT_PRI_EH 22 /* Error Handling Register */
+#define PCI_HT_PRI_MBU 24 /* Memory Base Upper Register */
+#define PCI_HT_PRI_MLU 25 /* Memory Limit Upper Register */
+#define PCI_HT_PRI_BN 26 /* Bus Number Register */
+#define PCI_HT_PRI_SIZEOF 28
+
+/* HyperTransport: Host or Secondary Interface */
+#define PCI_HT_SEC_CMD 2 /* Command Register */
+#define PCI_HT_SEC_CMD_WR 0x0001 /* Warm Reset */
+#define PCI_HT_SEC_CMD_DE 0x0002 /* Double-Ended */
+#define PCI_HT_SEC_CMD_DN 0x0076 /* Device Number */
+#define PCI_HT_SEC_CMD_CS 0x0080 /* Chain Side */
+#define PCI_HT_SEC_CMD_HH 0x0100 /* Host Hide */
+#define PCI_HT_SEC_CMD_AS 0x0400 /* Act as Slave */
+#define PCI_HT_SEC_CMD_HIECE 0x0800 /* Host Inbound End of Chain Error */
+#define PCI_HT_SEC_CMD_DUL 0x1000 /* Drop on Uninitialized Link */
+
+#define PCI_HT_SEC_LCTR 4 /* Link Control Register */
+#define PCI_HT_SEC_LCNF 6 /* Link Config Register */
+#define PCI_HT_SEC_RID 8 /* Revision ID Register */
+#define PCI_HT_SEC_LFRER 9 /* Link Frequency/Error Register */
+#define PCI_HT_SEC_LFCAP 10 /* Link Frequency Capability Register */
+#define PCI_HT_SEC_FTR 12 /* Feature Register */
+#define PCI_HT_SEC_FTR_EXTRS 0x0100 /* Extended Register Set */
+#define PCI_HT_SEC_FTR_UCNFE 0x0200 /* Upstream Configuration Enable */
+#define PCI_HT_SEC_ES 16 /* Enumeration Scratchpad Register */
+#define PCI_HT_SEC_EH 18 /* Error Handling Register */
+#define PCI_HT_SEC_MBU 20 /* Memory Base Upper Register */
+#define PCI_HT_SEC_MLU 21 /* Memory Limit Upper Register */
+#define PCI_HT_SEC_SIZEOF 24
+
+/* HyperTransport: Switch */
+#define PCI_HT_SW_CMD 2 /* Switch Command Register */
+#define PCI_HT_SW_CMD_VIBERR 0x0080 /* VIB Error */
+#define PCI_HT_SW_CMD_VIBFL 0x0100 /* VIB Flood */
+#define PCI_HT_SW_CMD_VIBFT 0x0200 /* VIB Fatal */
+#define PCI_HT_SW_CMD_VIBNFT 0x0400 /* VIB Nonfatal */
+#define PCI_HT_SW_PMASK 4 /* Partition Mask Register */
+#define PCI_HT_SW_SWINF 8 /* Switch Info Register */
+#define PCI_HT_SW_SWINF_DP 0x0000001f /* Default Port */
+#define PCI_HT_SW_SWINF_EN 0x00000020 /* Enable Decode */
+#define PCI_HT_SW_SWINF_CR 0x00000040 /* Cold Reset */
+#define PCI_HT_SW_SWINF_PCIDX 0x00000f00 /* Performance Counter Index */
+#define PCI_HT_SW_SWINF_BLRIDX 0x0003f000 /* Base/Limit Range Index */
+#define PCI_HT_SW_SWINF_SBIDX 0x00002000 /* Secondary Base Range Index */
+#define PCI_HT_SW_SWINF_HP 0x00040000 /* Hot Plug */
+#define PCI_HT_SW_SWINF_HIDE 0x00080000 /* Hide Port */
+#define PCI_HT_SW_PCD 12 /* Performance Counter Data Register */
+#define PCI_HT_SW_BLRD 16 /* Base/Limit Range Data Register */
+#define PCI_HT_SW_SBD 20 /* Secondary Base Data Register */
+#define PCI_HT_SW_SIZEOF 24
+
+ /* Counter indices */
+#define PCI_HT_SW_PC_PCR 0x0 /* Posted Command Receive */
+#define PCI_HT_SW_PC_NPCR 0x1 /* Nonposted Command Receive */
+#define PCI_HT_SW_PC_RCR 0x2 /* Response Command Receive */
+#define PCI_HT_SW_PC_PDWR 0x3 /* Posted DW Receive */
+#define PCI_HT_SW_PC_NPDWR 0x4 /* Nonposted DW Receive */
+#define PCI_HT_SW_PC_RDWR 0x5 /* Response DW Receive */
+#define PCI_HT_SW_PC_PCT 0x6 /* Posted Command Transmit */
+#define PCI_HT_SW_PC_NPCT 0x7 /* Nonposted Command Transmit */
+#define PCI_HT_SW_PC_RCT 0x8 /* Response Command Transmit */
+#define PCI_HT_SW_PC_PDWT 0x9 /* Posted DW Transmit */
+#define PCI_HT_SW_PC_NPDWT 0xa /* Nonposted DW Transmit */
+#define PCI_HT_SW_PC_RDWT 0xb /* Response DW Transmit */
+
+ /* Base/Limit Range indices */
+#define PCI_HT_SW_BLR_BASE0_LO 0x0 /* Base 0[31:1], Enable */
+#define PCI_HT_SW_BLR_BASE0_HI 0x1 /* Base 0 Upper */
+#define PCI_HT_SW_BLR_LIM0_LO 0x2 /* Limit 0 Lower */
+#define PCI_HT_SW_BLR_LIM0_HI 0x3 /* Limit 0 Upper */
+
+ /* Secondary Base indices */
+#define PCI_HT_SW_SB_LO 0x0 /* Secondary Base[31:1], Enable */
+#define PCI_HT_SW_S0_HI 0x1 /* Secondary Base Upper */
+
+/* HyperTransport: Interrupt Discovery and Configuration */
+#define PCI_HT_IDC_IDX 2 /* Index Register */
+#define PCI_HT_IDC_DATA 4 /* Data Register */
+#define PCI_HT_IDC_SIZEOF 8
+
+ /* Register indices */
+#define PCI_HT_IDC_IDX_LINT 0x01 /* Last Interrupt Register */
+#define PCI_HT_IDC_LINT 0x00ff0000 /* Last interrupt definition */
+#define PCI_HT_IDC_IDX_IDR 0x10 /* Interrupt Definition Registers */
+ /* Low part (at index) */
+#define PCI_HT_IDC_IDR_MASK 0x10000001 /* Mask */
+#define PCI_HT_IDC_IDR_POL 0x10000002 /* Polarity */
+#define PCI_HT_IDC_IDR_II_2 0x1000001c /* IntrInfo[4:2]: Message Type */
+#define PCI_HT_IDC_IDR_II_5 0x10000020 /* IntrInfo[5]: Request EOI */
+#define PCI_HT_IDC_IDR_II_6 0x00ffffc0 /* IntrInfo[23:6] */
+#define PCI_HT_IDC_IDR_II_24 0xff000000 /* IntrInfo[31:24] */
+ /* High part (at index + 1) */
+#define PCI_HT_IDC_IDR_II_32 0x00ffffff /* IntrInfo[55:32] */
+#define PCI_HT_IDC_IDR_PASSPW 0x40000000 /* PassPW setting for messages */
+#define PCI_HT_IDC_IDR_WEOI 0x80000000 /* Waiting for EOI */
+
+/* HyperTransport: Revision ID */
+#define PCI_HT_RID_RID 2 /* Revision Register */
+#define PCI_HT_RID_SIZEOF 4
+
+/* HyperTransport: UnitID Clumping */
+#define PCI_HT_UIDC_CS 4 /* Clumping Support Register */
+#define PCI_HT_UIDC_CE 8 /* Clumping Enable Register */
+#define PCI_HT_UIDC_SIZEOF 12
+
+/* HyperTransport: Extended Configuration Space Access */
+#define PCI_HT_ECSA_ADDR 4 /* Configuration Address Register */
+#define PCI_HT_ECSA_ADDR_REG 0x00000ffc /* Register */
+#define PCI_HT_ECSA_ADDR_FUN 0x00007000 /* Function */
+#define PCI_HT_ECSA_ADDR_DEV 0x000f1000 /* Device */
+#define PCI_HT_ECSA_ADDR_BUS 0x0ff00000 /* Bus Number */
+#define PCI_HT_ECSA_ADDR_TYPE 0x10000000 /* Access Type */
+#define PCI_HT_ECSA_DATA 8 /* Configuration Data Register */
+#define PCI_HT_ECSA_SIZEOF 12
+
+/* HyperTransport: Address Mapping */
+#define PCI_HT_AM_CMD 2 /* Command Register */
+#define PCI_HT_AM_CMD_NDMA 0x000f /* Number of DMA Mappings */
+#define PCI_HT_AM_CMD_IOSIZ 0x01f0 /* I/O Size */
+#define PCI_HT_AM_CMD_MT 0x0600 /* Map Type */
+#define PCI_HT_AM_CMD_MT_40B 0x0000 /* 40-bit */
+#define PCI_HT_AM_CMD_MT_64B 0x0200 /* 64-bit */
+
+ /* Window Control Register bits */
+#define PCI_HT_AM_SBW_CTR_COMP 0x1 /* Compat */
+#define PCI_HT_AM_SBW_CTR_NCOH 0x2 /* NonCoherent */
+#define PCI_HT_AM_SBW_CTR_ISOC 0x4 /* Isochronous */
+#define PCI_HT_AM_SBW_CTR_EN 0x8 /* Enable */
+
+/* HyperTransport: 40-bit Address Mapping */
+#define PCI_HT_AM40_SBNPW 4 /* Secondary Bus Non-Prefetchable Window Register */
+#define PCI_HT_AM40_SBW_BASE 0x000fffff /* Window Base */
+#define PCI_HT_AM40_SBW_CTR 0xf0000000 /* Window Control */
+#define PCI_HT_AM40_SBPW 8 /* Secondary Bus Prefetchable Window Register */
+#define PCI_HT_AM40_DMA_PBASE0 12 /* DMA Window Primary Base 0 Register */
+#define PCI_HT_AM40_DMA_CTR0 15 /* DMA Window Control 0 Register */
+#define PCI_HT_AM40_DMA_CTR_CTR 0xf0 /* Window Control */
+#define PCI_HT_AM40_DMA_SLIM0 16 /* DMA Window Secondary Limit 0 Register */
+#define PCI_HT_AM40_DMA_SBASE0 18 /* DMA Window Secondary Base 0 Register */
+#define PCI_HT_AM40_SIZEOF 12 /* size is variable: 12 + 8 * NDMA */
+
+/* HyperTransport: 64-bit Address Mapping */
+#define PCI_HT_AM64_IDX 4 /* Index Register */
+#define PCI_HT_AM64_DATA_LO 8 /* Data Lower Register */
+#define PCI_HT_AM64_DATA_HI 12 /* Data Upper Register */
+#define PCI_HT_AM64_SIZEOF 16
+
+ /* Register indices */
+#define PCI_HT_AM64_IDX_SBNPW 0x00 /* Secondary Bus Non-Prefetchable Window Register */
+#define PCI_HT_AM64_W_BASE_LO 0xfff00000 /* Window Base Lower */
+#define PCI_HT_AM64_W_CTR 0x0000000f /* Window Control */
+#define PCI_HT_AM64_IDX_SBPW 0x01 /* Secondary Bus Prefetchable Window Register */
+#define PCI_HT_AM64_IDX_PBNPW 0x02 /* Primary Bus Non-Prefetchable Window Register */
+#define PCI_HT_AM64_IDX_DMAPB0 0x04 /* DMA Window Primary Base 0 Register */
+#define PCI_HT_AM64_IDX_DMASB0 0x05 /* DMA Window Secondary Base 0 Register */
+#define PCI_HT_AM64_IDX_DMASL0 0x06 /* DMA Window Secondary Limit 0 Register */
+
+/* HyperTransport: MSI Mapping */
+#define PCI_HT_MSIM_CMD 2 /* Command Register */
+#define PCI_HT_MSIM_CMD_EN 0x0001 /* Mapping Active */
+#define PCI_HT_MSIM_CMD_FIXD 0x0002 /* MSI Mapping Address Fixed */
+#define PCI_HT_MSIM_ADDR_LO 4 /* MSI Mapping Address Lower Register */
+#define PCI_HT_MSIM_ADDR_HI 8 /* MSI Mapping Address Upper Register */
+#define PCI_HT_MSIM_SIZEOF 12
+
+/* HyperTransport: DirectRoute */
+#define PCI_HT_DR_CMD 2 /* Command Register */
+#define PCI_HT_DR_CMD_NDRS 0x000f /* Number of DirectRoute Spaces */
+#define PCI_HT_DR_CMD_IDX 0x01f0 /* Index */
+#define PCI_HT_DR_EN 4 /* Enable Vector Register */
+#define PCI_HT_DR_DATA 8 /* Data Register */
+#define PCI_HT_DR_SIZEOF 12
+
+ /* Register indices */
+#define PCI_HT_DR_IDX_BASE_LO 0x00 /* DirectRoute Base Lower Register */
+#define PCI_HT_DR_OTNRD 0x00000001 /* Opposite to Normal Request Direction */
+#define PCI_HT_DR_BL_LO 0xffffff00 /* Base/Limit Lower */
+#define PCI_HT_DR_IDX_BASE_HI 0x01 /* DirectRoute Base Upper Register */
+#define PCI_HT_DR_IDX_LIMIT_LO 0x02 /* DirectRoute Limit Lower Register */
+#define PCI_HT_DR_IDX_LIMIT_HI 0x03 /* DirectRoute Limit Upper Register */
+
+/* HyperTransport: VCSet */
+#define PCI_HT_VCS_SUP 4 /* VCSets Supported Register */
+#define PCI_HT_VCS_L1EN 5 /* Link 1 VCSets Enabled Register */
+#define PCI_HT_VCS_L0EN 6 /* Link 0 VCSets Enabled Register */
+#define PCI_HT_VCS_SBD 8 /* Stream Bucket Depth Register */
+#define PCI_HT_VCS_SINT 9 /* Stream Interval Register */
+#define PCI_HT_VCS_SSUP 10 /* Number of Streaming VCs Supported Register */
+#define PCI_HT_VCS_SSUP_0 0x00 /* Streaming VC 0 */
+#define PCI_HT_VCS_SSUP_3 0x01 /* Streaming VCs 0-3 */
+#define PCI_HT_VCS_SSUP_15 0x02 /* Streaming VCs 0-15 */
+#define PCI_HT_VCS_NFCBD 12 /* Non-FC Bucket Depth Register */
+#define PCI_HT_VCS_NFCINT 13 /* Non-FC Bucket Interval Register */
+#define PCI_HT_VCS_SIZEOF 16
+
+/* HyperTransport: Retry Mode */
+#define PCI_HT_RM_CTR0 4 /* Control 0 Register */
+#define PCI_HT_RM_CTR_LRETEN 0x01 /* Link Retry Enable */
+#define PCI_HT_RM_CTR_FSER 0x02 /* Force Single Error */
+#define PCI_HT_RM_CTR_ROLNEN 0x04 /* Rollover Nonfatal Enable */
+#define PCI_HT_RM_CTR_FSS 0x08 /* Force Single Stomp */
+#define PCI_HT_RM_CTR_RETNEN 0x10 /* Retry Nonfatal Enable */
+#define PCI_HT_RM_CTR_RETFEN 0x20 /* Retry Fatal Enable */
+#define PCI_HT_RM_CTR_AA 0xc0 /* Allowed Attempts */
+#define PCI_HT_RM_STS0 5 /* Status 0 Register */
+#define PCI_HT_RM_STS_RETSNT 0x01 /* Retry Sent */
+#define PCI_HT_RM_STS_CNTROL 0x02 /* Count Rollover */
+#define PCI_HT_RM_STS_SRCV 0x04 /* Stomp Received */
+#define PCI_HT_RM_CTR1 6 /* Control 1 Register */
+#define PCI_HT_RM_STS1 7 /* Status 1 Register */
+#define PCI_HT_RM_CNT0 8 /* Retry Count 0 Register */
+#define PCI_HT_RM_CNT1 10 /* Retry Count 1 Register */
+#define PCI_HT_RM_SIZEOF 12
+
+/* Vendor-Specific Capability (see PCI_EVNDR_xxx for the PCIe version) */
+#define PCI_VNDR_LENGTH 2 /* Length byte */
+
+/* PCI Express */
+#define PCI_EXP_FLAGS 0x2 /* Capabilities register */
+#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */
+#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */
+#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */
+#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */
+#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */
+#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */
+#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */
+#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */
+#define PCI_EXP_TYPE_PCIE_BRIDGE 0x8 /* PCI/PCI-X to PCIE Bridge */
+#define PCI_EXP_TYPE_ROOT_INT_EP 0x9 /* Root Complex Integrated Endpoint */
+#define PCI_EXP_TYPE_ROOT_EC 0xa /* Root Complex Event Collector */
+#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */
+#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */
+#define PCI_EXP_DEVCAP 0x4 /* Device capabilities */
+#define PCI_EXP_DEVCAP_PAYLOAD 0x07 /* Max_Payload_Size */
+#define PCI_EXP_DEVCAP_PHANTOM 0x18 /* Phantom functions */
+#define PCI_EXP_DEVCAP_EXT_TAG 0x20 /* Extended tags */
+#define PCI_EXP_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */
+#define PCI_EXP_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */
+#define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */
+#define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */
+#define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */
+#define PCI_EXP_DEVCAP_RBE 0x8000 /* Role-Based Error Reporting */
+#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */
+#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */
+#define PCI_EXP_DEVCAP_FLRESET 0x10000000 /* Function-Level Reset */
+#define PCI_EXP_DEVCTL 0x8 /* Device Control */
+#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */
+#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */
+#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */
+#define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */
+#define PCI_EXP_DEVCTL_RELAXED 0x0010 /* Enable Relaxed Ordering */
+#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */
+#define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */
+#define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */
+#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */
+#define PCI_EXP_DEVCTL_NOSNOOP 0x0800 /* Enable No Snoop */
+#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */
+#define PCI_EXP_DEVCTL_BCRE 0x8000 /* Bridge Configuration Retry Enable */
+#define PCI_EXP_DEVCTL_FLRESET 0x8000 /* Function-Level Reset [bit shared with BCRE] */
+#define PCI_EXP_DEVSTA 0xa /* Device Status */
+#define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */
+#define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */
+#define PCI_EXP_DEVSTA_FED 0x04 /* Fatal Error Detected */
+#define PCI_EXP_DEVSTA_URD 0x08 /* Unsupported Request Detected */
+#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */
+#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */
+#define PCI_EXP_LNKCAP 0xc /* Link Capabilities */
+#define PCI_EXP_LNKCAP_SPEED 0x0000f /* Maximum Link Speed */
+#define PCI_EXP_LNKCAP_WIDTH 0x003f0 /* Maximum Link Width */
+#define PCI_EXP_LNKCAP_ASPM 0x00c00 /* Active State Power Management */
+#define PCI_EXP_LNKCAP_L0S 0x07000 /* L0s Acceptable Latency */
+#define PCI_EXP_LNKCAP_L1 0x38000 /* L1 Acceptable Latency */
+#define PCI_EXP_LNKCAP_CLOCKPM 0x40000 /* Clock Power Management */
+#define PCI_EXP_LNKCAP_SURPRISE 0x80000 /* Surprise Down Error Reporting */
+#define PCI_EXP_LNKCAP_DLLA 0x100000 /* Data Link Layer Active Reporting */
+#define PCI_EXP_LNKCAP_LBNC 0x200000 /* Link Bandwidth Notification Capability */
+#define PCI_EXP_LNKCAP_PORT 0xff000000 /* Port Number */
+#define PCI_EXP_LNKCTL 0x10 /* Link Control */
+#define PCI_EXP_LNKCTL_ASPM 0x0003 /* ASPM Control */
+#define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */
+#define PCI_EXP_LNKCTL_DISABLE 0x0010 /* Link Disable */
+#define PCI_EXP_LNKCTL_RETRAIN 0x0020 /* Retrain Link */
+#define PCI_EXP_LNKCTL_CLOCK 0x0040 /* Common Clock Configuration */
+#define PCI_EXP_LNKCTL_XSYNCH 0x0080 /* Extended Synch */
+#define PCI_EXP_LNKCTL_CLOCKPM 0x0100 /* Clock Power Management */
+#define PCI_EXP_LNKCTL_HWAUTWD 0x0200 /* Hardware Autonomous Width Disable */
+#define PCI_EXP_LNKCTL_BWMIE 0x0400 /* Bandwidth Mgmt Interrupt Enable */
+#define PCI_EXP_LNKCTL_AUTBWIE 0x0800 /* Autonomous Bandwidth Mgmt Interrupt Enable */
+#define PCI_EXP_LNKSTA 0x12 /* Link Status */
+#define PCI_EXP_LNKSTA_SPEED 0x000f /* Negotiated Link Speed */
+#define PCI_EXP_LNKSTA_WIDTH 0x03f0 /* Negotiated Link Width */
+#define PCI_EXP_LNKSTA_TR_ERR 0x0400 /* Training Error (obsolete) */
+#define PCI_EXP_LNKSTA_TRAIN 0x0800 /* Link Training */
+#define PCI_EXP_LNKSTA_SL_CLK 0x1000 /* Slot Clock Configuration */
+#define PCI_EXP_LNKSTA_DL_ACT 0x2000 /* Data Link Layer in DL_Active State */
+#define PCI_EXP_LNKSTA_BWMGMT 0x4000 /* Bandwidth Mgmt Status */
+#define PCI_EXP_LNKSTA_AUTBW 0x8000 /* Autonomous Bandwidth Mgmt Status */
+#define PCI_EXP_SLTCAP 0x14 /* Slot Capabilities */
+#define PCI_EXP_SLTCAP_ATNB 0x0001 /* Attention Button Present */
+#define PCI_EXP_SLTCAP_PWRC 0x0002 /* Power Controller Present */
+#define PCI_EXP_SLTCAP_MRL 0x0004 /* MRL Sensor Present */
+#define PCI_EXP_SLTCAP_ATNI 0x0008 /* Attention Indicator Present */
+#define PCI_EXP_SLTCAP_PWRI 0x0010 /* Power Indicator Present */
+#define PCI_EXP_SLTCAP_HPS 0x0020 /* Hot-Plug Surprise */
+#define PCI_EXP_SLTCAP_HPC 0x0040 /* Hot-Plug Capable */
+#define PCI_EXP_SLTCAP_PWR_VAL 0x00007f80 /* Slot Power Limit Value */
+#define PCI_EXP_SLTCAP_PWR_SCL 0x00018000 /* Slot Power Limit Scale */
+#define PCI_EXP_SLTCAP_INTERLOCK 0x020000 /* Electromechanical Interlock Present */
+#define PCI_EXP_SLTCAP_NOCMDCOMP 0x040000 /* No Command Completed Support */
+#define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */
+#define PCI_EXP_SLTCTL 0x18 /* Slot Control */
+#define PCI_EXP_SLTCTL_ATNB 0x0001 /* Attention Button Pressed Enable */
+#define PCI_EXP_SLTCTL_PWRF 0x0002 /* Power Fault Detected Enable */
+#define PCI_EXP_SLTCTL_MRLS 0x0004 /* MRL Sensor Changed Enable */
+#define PCI_EXP_SLTCTL_PRSD 0x0008 /* Presence Detect Changed Enable */
+#define PCI_EXP_SLTCTL_CMDC 0x0010 /* Command Completed Interrupt Enable */
+#define PCI_EXP_SLTCTL_HPIE 0x0020 /* Hot-Plug Interrupt Enable */
+#define PCI_EXP_SLTCTL_ATNI 0x00c0 /* Attention Indicator Control */
+#define PCI_EXP_SLTCTL_PWRI 0x0300 /* Power Indicator Control */
+#define PCI_EXP_SLTCTL_PWRC 0x0400 /* Power Controller Control */
+#define PCI_EXP_SLTCTL_INTERLOCK 0x0800 /* Electromechanical Interlock Control */
+#define PCI_EXP_SLTCTL_LLCHG 0x1000 /* Data Link Layer State Changed Enable */
+#define PCI_EXP_SLTSTA 0x1a /* Slot Status */
+#define PCI_EXP_SLTSTA_ATNB 0x0001 /* Attention Button Pressed */
+#define PCI_EXP_SLTSTA_PWRF 0x0002 /* Power Fault Detected */
+#define PCI_EXP_SLTSTA_MRLS 0x0004 /* MRL Sensor Changed */
+#define PCI_EXP_SLTSTA_PRSD 0x0008 /* Presence Detect Changed */
+#define PCI_EXP_SLTSTA_CMDC 0x0010 /* Command Completed */
+#define PCI_EXP_SLTSTA_MRL_ST 0x0020 /* MRL Sensor State */
+#define PCI_EXP_SLTSTA_PRES 0x0040 /* Presence Detect State */
+#define PCI_EXP_SLTSTA_INTERLOCK 0x0080 /* Electromechanical Interlock Status */
+#define PCI_EXP_SLTSTA_LLCHG 0x0100 /* Data Link Layer State Changed */
+#define PCI_EXP_RTCTL 0x1c /* Root Control */
+#define PCI_EXP_RTCTL_SECEE 0x0001 /* System Error on Correctable Error */
+#define PCI_EXP_RTCTL_SENFEE 0x0002 /* System Error on Non-Fatal Error */
+#define PCI_EXP_RTCTL_SEFEE 0x0004 /* System Error on Fatal Error */
+#define PCI_EXP_RTCTL_PMEIE 0x0008 /* PME Interrupt Enable */
+#define PCI_EXP_RTCTL_CRSVIS 0x0010 /* Configuration Request Retry Status Visible to SW */
+#define PCI_EXP_RTCAP 0x1e /* Root Capabilities */
+#define PCI_EXP_RTCAP_CRSVIS 0x0010 /* Configuration Request Retry Status Visible to SW */
+#define PCI_EXP_RTSTA 0x20 /* Root Status */
+#define PCI_EXP_RTSTA_PME_REQID 0x0000ffff /* PME Requester ID */
+#define PCI_EXP_RTSTA_PME_STATUS 0x00010000 /* PME Status */
+#define PCI_EXP_RTSTA_PME_PENDING 0x00020000 /* PME is Pending */
+#define PCI_EXP_DEVCAP2 0x24 /* Device capabilities 2 */
+#define PCI_EXP_DEVCTL2 0x28 /* Device Control */
+#define PCI_EXP_DEV2_TIMEOUT_RANGE(x) ((x) & 0xf) /* Completion Timeout Ranges Supported */
+#define PCI_EXP_DEV2_TIMEOUT_VALUE(x) ((x) & 0xf) /* Completion Timeout Value */
+#define PCI_EXP_DEV2_TIMEOUT_DIS 0x0010 /* Completion Timeout Disable Supported */
+#define PCI_EXP_DEV2_ARI 0x0020 /* ARI Forwarding */
+#define PCI_EXP_DEVSTA2 0x2a /* Device Status */
+#define PCI_EXP_LNKCAP2 0x2c /* Link Capabilities */
+#define PCI_EXP_LNKCTL2 0x30 /* Link Control */
+#define PCI_EXP_LNKCTL2_SPEED(x) ((x) & 0xf) /* Target Link Speed */
+#define PCI_EXP_LNKCTL2_CMPLNC 0x0010 /* Enter Compliance */
+#define PCI_EXP_LNKCTL2_SPEED_DIS 0x0020 /* Hardware Autonomous Speed Disable */
+#define PCI_EXP_LNKCTL2_DEEMPHASIS(x) (((x) >> 6) & 1) /* Selectable De-emphasis */
+#define PCI_EXP_LNKCTL2_MARGIN(x) (((x) >> 7) & 7) /* Transmit Margin */
+#define PCI_EXP_LNKCTL2_MOD_CMPLNC 0x0400 /* Enter Modified Compliance */
+#define PCI_EXP_LNKCTL2_CMPLNC_SOS 0x0800 /* Compliance SOS */
+#define PCI_EXP_LNKCTL2_COM_DEEMPHASIS(x) (((x) >> 12) & 1) /* Compliance De-emphasis */
+#define PCI_EXP_LNKSTA2 0x32 /* Link Status */
+#define PCI_EXP_LINKSTA2_DEEMPHASIS(x) ((x) & 1) /* Current De-emphasis Level */
+#define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities */
+#define PCI_EXP_SLTCTL2 0x38 /* Slot Control */
+#define PCI_EXP_SLTSTA2 0x3a /* Slot Status */
+
+/* MSI-X */
+#define PCI_MSIX_ENABLE 0x8000
+#define PCI_MSIX_MASK 0x4000
+#define PCI_MSIX_TABSIZE 0x07ff
+#define PCI_MSIX_TABLE 4
+#define PCI_MSIX_PBA 8
+#define PCI_MSIX_BIR 0x7
+
+/* Subsystem vendor/device ID for PCI bridges */
+#define PCI_SSVID_VENDOR 4
+#define PCI_SSVID_DEVICE 6
+
+/* PCI Advanced Features */
+#define PCI_AF_CAP 3
+#define PCI_AF_CAP_TP 0x01
+#define PCI_AF_CAP_FLR 0x02
+#define PCI_AF_CTRL 4
+#define PCI_AF_CTRL_FLR 0x01
+#define PCI_AF_STATUS 5
+#define PCI_AF_STATUS_TP 0x01
+
+/* SATA Host Bus Adapter */
+#define PCI_SATA_HBA_BARS 4
+#define PCI_SATA_HBA_REG0 8
+
+/*** Definitions of extended capabilities ***/
+
+/* Advanced Error Reporting */
+#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */
+#define PCI_ERR_UNC_TRAIN 0x00000001 /* Undefined in PCIe rev1.1 & 2.0 spec */
+#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */
+#define PCI_ERR_UNC_SDES 0x00000020 /* Surprise Down Error */
+#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */
+#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */
+#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */
+#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */
+#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */
+#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */
+#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */
+#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */
+#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */
+#define PCI_ERR_UNC_ACS_VIOL 0x00200000 /* ACS Violation */
+#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */
+ /* Same bits as above */
+#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */
+ /* Same bits as above */
+#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */
+#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */
+#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */
+#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */
+#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */
+#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */
+#define PCI_ERR_COR_REP_ANFE 0x00002000 /* Advisory Non-Fatal Error */
+#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */
+ /* Same bits as above */
+#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */
+#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */
+#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */
+#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */
+#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */
+#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */
+#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */
+#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */
+#define PCI_ERR_ROOT_STATUS 48
+#define PCI_ERR_ROOT_COR_SRC 52
+#define PCI_ERR_ROOT_SRC 54
+
+/* Virtual Channel */
+#define PCI_VC_PORT_REG1 4
+#define PCI_VC_PORT_REG2 8
+#define PCI_VC_PORT_CTRL 12
+#define PCI_VC_PORT_STATUS 14
+#define PCI_VC_RES_CAP 16
+#define PCI_VC_RES_CTRL 20
+#define PCI_VC_RES_STATUS 26
+
+/* Power Budgeting */
+#define PCI_PWR_DSR 4 /* Data Select Register */
+#define PCI_PWR_DATA 8 /* Data Register */
+#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */
+#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */
+#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */
+#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
+#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */
+#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */
+#define PCI_PWR_CAP 12 /* Capability */
+#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */
+
+/* Root Complex Link */
+#define PCI_RCLINK_ESD 4 /* Element Self Description */
+#define PCI_RCLINK_LINK1 16 /* First Link Entry */
+#define PCI_RCLINK_LINK_DESC 0 /* Link Entry: Description */
+#define PCI_RCLINK_LINK_ADDR 8 /* Link Entry: Address (64-bit) */
+#define PCI_RCLINK_LINK_SIZE 16 /* Link Entry: sizeof */
+
+/* PCIe Vendor-Specific Capability */
+#define PCI_EVNDR_HEADER 4 /* Vendor-Specific Header */
+#define PCI_EVNDR_REGISTERS 8 /* Vendor-Specific Registers */
+
+/* Access Control Services */
+#define PCI_ACS_CAP 0x04 /* ACS Capability Register */
+#define PCI_ACS_CAP_VALID 0x0001 /* ACS Source Validation */
+#define PCI_ACS_CAP_BLOCK 0x0002 /* ACS Translation Blocking */
+#define PCI_ACS_CAP_REQ_RED 0x0004 /* ACS P2P Request Redirect */
+#define PCI_ACS_CAP_CMPLT_RED 0x0008 /* ACS P2P Completion Redirect */
+#define PCI_ACS_CAP_FORWARD 0x0010 /* ACS Upstream Forwarding */
+#define PCI_ACS_CAP_EGRESS 0x0020 /* ACS P2P Egress Control */
+#define PCI_ACS_CAP_TRANS 0x0040 /* ACS Direct Translated P2P */
+#define PCI_ACS_CAP_VECTOR(x) (((x) >> 8) & 0xff) /* Egress Control Vector Size */
+#define PCI_ACS_CTRL 0x06 /* ACS Control Register */
+#define PCI_ACS_CTRL_VALID 0x0001 /* ACS Source Validation Enable */
+#define PCI_ACS_CTRL_BLOCK 0x0002 /* ACS Translation Blocking Enable */
+#define PCI_ACS_CTRL_REQ_RED 0x0004 /* ACS P2P Request Redirect Enable */
+#define PCI_ACS_CTRL_CMPLT_RED 0x0008 /* ACS P2P Completion Redirect Enable */
+#define PCI_ACS_CTRL_FORWARD 0x0010 /* ACS Upstream Forwarding Enable */
+#define PCI_ACS_CTRL_EGRESS 0x0020 /* ACS P2P Egress Control Enable */
+#define PCI_ACS_CTRL_TRANS 0x0040 /* ACS Direct Translated P2P Enable */
+#define PCI_ACS_EGRESS_CTRL 0x08 /* Egress Control Vector */
+
+/* Alternative Routing-ID Interpretation */
+#define PCI_ARI_CAP 0x04 /* ARI Capability Register */
+#define PCI_ARI_CAP_MFVC 0x0001 /* MFVC Function Groups Capability */
+#define PCI_ARI_CAP_ACS 0x0002 /* ACS Function Groups Capability */
+#define PCI_ARI_CAP_NFN(x) (((x) >> 8) & 0xff) /* Next Function Number */
+#define PCI_ARI_CTRL 0x06 /* ARI Control Register */
+#define PCI_ARI_CTRL_MFVC 0x0001 /* MFVC Function Groups Enable */
+#define PCI_ARI_CTRL_ACS 0x0002 /* ACS Function Groups Enable */
+#define PCI_ARI_CTRL_FG(x) (((x) >> 4) & 7) /* Function Group */
+
+/* Address Translation Service */
+#define PCI_ATS_CAP 0x04 /* ATS Capability Register */
+#define PCI_ATS_CAP_IQD(x) ((x) & 0x1f) /* Invalidate Queue Depth */
+#define PCI_ATS_CTRL 0x06 /* ATS Control Register */
+#define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */
+#define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */
+
+/* Single Root I/O Virtualization */
+#define PCI_IOV_CAP 0x04 /* SR-IOV Capability Register */
+#define PCI_IOV_CAP_VFM 0x00000001 /* VF Migration Capable */
+#define PCI_IOV_CAP_IMN(x) ((x) >> 21) /* VF Migration Interrupt Message Number */
+#define PCI_IOV_CTRL 0x08 /* SR-IOV Control Register */
+#define PCI_IOV_CTRL_VFE 0x0001 /* VF Enable */
+#define PCI_IOV_CTRL_VFME 0x0002 /* VF Migration Enable */
+#define PCI_IOV_CTRL_VFMIE 0x0004 /* VF Migration Interrupt Enable */
+#define PCI_IOV_CTRL_MSE 0x0008 /* VF MSE */
+#define PCI_IOV_CTRL_ARI 0x0010 /* ARI Capable Hierarchy */
+#define PCI_IOV_STATUS 0x0a /* SR-IOV Status Register */
+#define PCI_IOV_STATUS_MS 0x0001 /* VF Migration Status */
+#define PCI_IOV_INITIALVF 0x0c /* Number of VFs that are initially associated */
+#define PCI_IOV_TOTALVF 0x0e /* Maximum number of VFs that could be associated */
+#define PCI_IOV_NUMVF 0x10 /* Number of VFs that are available */
+#define PCI_IOV_FDL 0x12 /* Function Dependency Link */
+#define PCI_IOV_OFFSET 0x14 /* First VF Offset */
+#define PCI_IOV_STRIDE 0x16 /* Routing ID offset from one VF to the next one */
+#define PCI_IOV_DID 0x1a /* VF Device ID */
+#define PCI_IOV_SUPPS 0x1c /* Supported Page Sizes */
+#define PCI_IOV_SYSPS 0x20 /* System Page Size */
+#define PCI_IOV_BAR_BASE 0x24 /* VF BAR0, VF BAR1, ... VF BAR5 */
+#define PCI_IOV_NUM_BAR 6 /* Number of VF BARs */
+#define PCI_IOV_MSAO 0x3c /* VF Migration State Array Offset */
+#define PCI_IOV_MSA_BIR(x) ((x) & 7) /* VF Migration State BIR */
+#define PCI_IOV_MSA_OFFSET(x) ((x) & 0xfffffff8) /* VF Migration State Offset */
+
+/* Transaction Processing Hints */
+#define PCI_TPH_CAPABILITIES 4
+#define PCI_TPH_INTVEC_SUP (1<<1) /* Supports interrupt vector mode */
+#define PCI_TPH_DEV_SUP (1<<2) /* Device specific mode supported */
+#define PCI_TPH_EXT_REQ_SUP (1<<8) /* Supports extended requests */
+#define PCI_TPH_ST_LOC_MASK (3<<9) /* Steering table location bits */
+#define PCI_TPH_ST_NONE (0<<9) /* No steering table */
+#define PCI_TPH_ST_CAP (1<<9) /* Steering table in TPH cap */
+#define PCI_TPH_ST_MSIX (2<<9) /* Steering table in MSI-X table */
+#define PCI_TPH_ST_SIZE_SHIFT (16) /* Encoded as size - 1 */
+
+/* Latency Tolerance Reporting */
+#define PCI_LTR_MAX_SNOOP 4 /* 16 bit value */
+#define PCI_LTR_VALUE_MASK (0x3ff)
+#define PCI_LTR_SCALE_SHIFT (10)
+#define PCI_LTR_SCALE_MASK (7)
+#define PCI_LTR_MAX_NOSNOOP 6 /* 16 bit value */
+
+/*
+ * The PCI interface treats multi-function devices as independent
+ * devices. The slot/function address of each device is encoded
+ * in a single byte as follows:
+ *
+ * 7:3 = slot
+ * 2:0 = function
+ */
+#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
+#define PCI_FUNC(devfn) ((devfn) & 0x07)
+
+/* Device classes and subclasses */
+
+#define PCI_CLASS_NOT_DEFINED 0x0000
+#define PCI_CLASS_NOT_DEFINED_VGA 0x0001
+
+#define PCI_BASE_CLASS_STORAGE 0x01
+#define PCI_CLASS_STORAGE_SCSI 0x0100
+#define PCI_CLASS_STORAGE_IDE 0x0101
+#define PCI_CLASS_STORAGE_FLOPPY 0x0102
+#define PCI_CLASS_STORAGE_IPI 0x0103
+#define PCI_CLASS_STORAGE_RAID 0x0104
+#define PCI_CLASS_STORAGE_ATA 0x0105
+#define PCI_CLASS_STORAGE_SATA 0x0106
+#define PCI_CLASS_STORAGE_SAS 0x0107
+#define PCI_CLASS_STORAGE_OTHER 0x0180
+
+#define PCI_BASE_CLASS_NETWORK 0x02
+#define PCI_CLASS_NETWORK_ETHERNET 0x0200
+#define PCI_CLASS_NETWORK_TOKEN_RING 0x0201
+#define PCI_CLASS_NETWORK_FDDI 0x0202
+#define PCI_CLASS_NETWORK_ATM 0x0203
+#define PCI_CLASS_NETWORK_ISDN 0x0204
+#define PCI_CLASS_NETWORK_OTHER 0x0280
+
+#define PCI_BASE_CLASS_DISPLAY 0x03
+#define PCI_CLASS_DISPLAY_VGA 0x0300
+#define PCI_CLASS_DISPLAY_XGA 0x0301
+#define PCI_CLASS_DISPLAY_3D 0x0302
+#define PCI_CLASS_DISPLAY_OTHER 0x0380
+
+#define PCI_BASE_CLASS_MULTIMEDIA 0x04
+#define PCI_CLASS_MULTIMEDIA_VIDEO 0x0400
+#define PCI_CLASS_MULTIMEDIA_AUDIO 0x0401
+#define PCI_CLASS_MULTIMEDIA_PHONE 0x0402
+#define PCI_CLASS_MULTIMEDIA_AUDIO_DEV 0x0403
+#define PCI_CLASS_MULTIMEDIA_OTHER 0x0480
+
+#define PCI_BASE_CLASS_MEMORY 0x05
+#define PCI_CLASS_MEMORY_RAM 0x0500
+#define PCI_CLASS_MEMORY_FLASH 0x0501
+#define PCI_CLASS_MEMORY_OTHER 0x0580
+
+#define PCI_BASE_CLASS_BRIDGE 0x06
+#define PCI_CLASS_BRIDGE_HOST 0x0600
+#define PCI_CLASS_BRIDGE_ISA 0x0601
+#define PCI_CLASS_BRIDGE_EISA 0x0602
+#define PCI_CLASS_BRIDGE_MC 0x0603
+#define PCI_CLASS_BRIDGE_PCI 0x0604
+#define PCI_CLASS_BRIDGE_PCMCIA 0x0605
+#define PCI_CLASS_BRIDGE_NUBUS 0x0606
+#define PCI_CLASS_BRIDGE_CARDBUS 0x0607
+#define PCI_CLASS_BRIDGE_RACEWAY 0x0608
+#define PCI_CLASS_BRIDGE_PCI_SEMI 0x0609
+#define PCI_CLASS_BRIDGE_IB_TO_PCI 0x060a
+#define PCI_CLASS_BRIDGE_OTHER 0x0680
+
+#define PCI_BASE_CLASS_COMMUNICATION 0x07
+#define PCI_CLASS_COMMUNICATION_SERIAL 0x0700
+#define PCI_CLASS_COMMUNICATION_PARALLEL 0x0701
+#define PCI_CLASS_COMMUNICATION_MSERIAL 0x0702
+#define PCI_CLASS_COMMUNICATION_MODEM 0x0703
+#define PCI_CLASS_COMMUNICATION_OTHER 0x0780
+
+#define PCI_BASE_CLASS_SYSTEM 0x08
+#define PCI_CLASS_SYSTEM_PIC 0x0800
+#define PCI_CLASS_SYSTEM_DMA 0x0801
+#define PCI_CLASS_SYSTEM_TIMER 0x0802
+#define PCI_CLASS_SYSTEM_RTC 0x0803
+#define PCI_CLASS_SYSTEM_PCI_HOTPLUG 0x0804
+#define PCI_CLASS_SYSTEM_OTHER 0x0880
+
+#define PCI_BASE_CLASS_INPUT 0x09
+#define PCI_CLASS_INPUT_KEYBOARD 0x0900
+#define PCI_CLASS_INPUT_PEN 0x0901
+#define PCI_CLASS_INPUT_MOUSE 0x0902
+#define PCI_CLASS_INPUT_SCANNER 0x0903
+#define PCI_CLASS_INPUT_GAMEPORT 0x0904
+#define PCI_CLASS_INPUT_OTHER 0x0980
+
+#define PCI_BASE_CLASS_DOCKING 0x0a
+#define PCI_CLASS_DOCKING_GENERIC 0x0a00
+#define PCI_CLASS_DOCKING_OTHER 0x0a80
+
+#define PCI_BASE_CLASS_PROCESSOR 0x0b
+#define PCI_CLASS_PROCESSOR_386 0x0b00
+#define PCI_CLASS_PROCESSOR_486 0x0b01
+#define PCI_CLASS_PROCESSOR_PENTIUM 0x0b02
+#define PCI_CLASS_PROCESSOR_ALPHA 0x0b10
+#define PCI_CLASS_PROCESSOR_POWERPC 0x0b20
+#define PCI_CLASS_PROCESSOR_MIPS 0x0b30
+#define PCI_CLASS_PROCESSOR_CO 0x0b40
+
+#define PCI_BASE_CLASS_SERIAL 0x0c
+#define PCI_CLASS_SERIAL_FIREWIRE 0x0c00
+#define PCI_CLASS_SERIAL_ACCESS 0x0c01
+#define PCI_CLASS_SERIAL_SSA 0x0c02
+#define PCI_CLASS_SERIAL_USB 0x0c03
+#define PCI_CLASS_SERIAL_FIBER 0x0c04
+#define PCI_CLASS_SERIAL_SMBUS 0x0c05
+#define PCI_CLASS_SERIAL_INFINIBAND 0x0c06
+
+#define PCI_BASE_CLASS_WIRELESS 0x0d
+#define PCI_CLASS_WIRELESS_IRDA 0x0d00
+#define PCI_CLASS_WIRELESS_CONSUMER_IR 0x0d01
+#define PCI_CLASS_WIRELESS_RF 0x0d10
+#define PCI_CLASS_WIRELESS_OTHER 0x0d80
+
+#define PCI_BASE_CLASS_INTELLIGENT 0x0e
+#define PCI_CLASS_INTELLIGENT_I2O 0x0e00
+
+#define PCI_BASE_CLASS_SATELLITE 0x0f
+#define PCI_CLASS_SATELLITE_TV 0x0f00
+#define PCI_CLASS_SATELLITE_AUDIO 0x0f01
+#define PCI_CLASS_SATELLITE_VOICE 0x0f03
+#define PCI_CLASS_SATELLITE_DATA 0x0f04
+
+#define PCI_BASE_CLASS_CRYPT 0x10
+#define PCI_CLASS_CRYPT_NETWORK 0x1000
+#define PCI_CLASS_CRYPT_ENTERTAINMENT 0x1010
+#define PCI_CLASS_CRYPT_OTHER 0x1080
+
+#define PCI_BASE_CLASS_SIGNAL 0x11
+#define PCI_CLASS_SIGNAL_DPIO 0x1100
+#define PCI_CLASS_SIGNAL_PERF_CTR 0x1101
+#define PCI_CLASS_SIGNAL_SYNCHRONIZER 0x1110
+#define PCI_CLASS_SIGNAL_OTHER 0x1180
+
+#define PCI_CLASS_OTHERS 0xff
+
+/* Several ID's we need in the library */
+
+#define PCI_VENDOR_ID_INTEL 0x8086
+#define PCI_VENDOR_ID_COMPAQ 0x0e11
diff --git a/ext/hwloc/include/pci/pci.h b/ext/hwloc/include/pci/pci.h
new file mode 100644
index 000000000..7a5a6b80c
--- /dev/null
+++ b/ext/hwloc/include/pci/pci.h
@@ -0,0 +1,240 @@
+/*
+ * The PCI Library
+ *
+ * Copyright (c) 1997--2009 Martin Mares
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#ifndef _PCI_LIB_H
+#define _PCI_LIB_H
+
+#ifndef PCI_CONFIG_H
+#include "config.h"
+#endif
+
+#include "header.h"
+#include "types.h"
+
+#define PCI_LIB_VERSION 0x030100
+
+#ifndef PCI_ABI
+#define PCI_ABI
+#endif
+
+/*
+ * PCI Access Structure
+ */
+
+struct pci_methods;
+
+enum pci_access_type {
+ /* Known access methods, remember to update access.c as well */
+ PCI_ACCESS_AUTO, /* Autodetection */
+ PCI_ACCESS_SYS_BUS_PCI, /* Linux /sys/bus/pci */
+ PCI_ACCESS_PROC_BUS_PCI, /* Linux /proc/bus/pci */
+ PCI_ACCESS_I386_TYPE1, /* i386 ports, type 1 */
+ PCI_ACCESS_I386_TYPE2, /* i386 ports, type 2 */
+ PCI_ACCESS_FBSD_DEVICE, /* FreeBSD /dev/pci */
+ PCI_ACCESS_AIX_DEVICE, /* /dev/pci0, /dev/bus0, etc. */
+ PCI_ACCESS_NBSD_LIBPCI, /* NetBSD libpci */
+ PCI_ACCESS_OBSD_DEVICE, /* OpenBSD /dev/pci */
+ PCI_ACCESS_DUMP, /* Dump file */
+ PCI_ACCESS_MAX
+};
+
+struct pci_access {
+ /* Options you can change: */
+ unsigned int method; /* Access method */
+ int writeable; /* Open in read/write mode */
+ int buscentric; /* Bus-centric view of the world */
+
+ char *id_file_name; /* Name of ID list file (use pci_set_name_list_path()) */
+ int free_id_name; /* Set if id_file_name is malloced */
+ int numeric_ids; /* Enforce PCI_LOOKUP_NUMERIC (>1 => PCI_LOOKUP_MIXED) */
+
+ unsigned int id_lookup_mode; /* pci_lookup_mode flags which are set automatically */
+ /* Default: PCI_LOOKUP_CACHE */
+
+ int debugging; /* Turn on debugging messages */
+
+ /* Functions you can override: */
+ void (*error)(char *msg, ...) PCI_PRINTF(1,2); /* Write error message and quit */
+ void (*warning)(char *msg, ...) PCI_PRINTF(1,2); /* Write a warning message */
+ void (*debug)(char *msg, ...) PCI_PRINTF(1,2); /* Write a debugging message */
+
+ struct pci_dev *devices; /* Devices found on this bus */
+
+ /* Fields used internally: */
+ struct pci_methods *methods;
+ struct pci_param *params;
+ struct id_entry **id_hash; /* names.c */
+ struct id_bucket *current_id_bucket;
+ int id_load_failed;
+ int id_cache_status; /* 0=not read, 1=read, 2=dirty */
+ int fd; /* proc/sys: fd for config space */
+ int fd_rw; /* proc/sys: fd opened read-write */
+ int fd_pos; /* proc/sys: current position */
+ int fd_vpd; /* sys: fd for VPD */
+ struct pci_dev *cached_dev; /* proc/sys: device the fds are for */
+};
+
+/* Initialize PCI access */
+struct pci_access *pci_alloc(void) PCI_ABI;
+void pci_init(struct pci_access *) PCI_ABI;
+void pci_cleanup(struct pci_access *) PCI_ABI;
+
+/* Scanning of devices */
+void pci_scan_bus(struct pci_access *acc) PCI_ABI;
+struct pci_dev *pci_get_dev(struct pci_access *acc, int domain, int bus, int dev, int func) PCI_ABI; /* Raw access to specified device */
+void pci_free_dev(struct pci_dev *) PCI_ABI;
+
+/* Names of access methods */
+int pci_lookup_method(char *name) PCI_ABI; /* Returns -1 if not found */
+char *pci_get_method_name(int index) PCI_ABI; /* Returns "" if unavailable, NULL if index out of range */
+
+/*
+ * Named parameters
+ */
+
+struct pci_param {
+ struct pci_param *next; /* Please use pci_walk_params() for traversing the list */
+ char *param; /* Name of the parameter */
+ char *value; /* Value of the parameter */
+ int value_malloced; /* used internally */
+ char *help; /* Explanation of the parameter */
+};
+
+char *pci_get_param(struct pci_access *acc, char *param) PCI_ABI;
+int pci_set_param(struct pci_access *acc, char *param, char *value) PCI_ABI; /* 0 on success, -1 if no such parameter */
+/* To traverse the list, call pci_walk_params repeatedly, first with prev=NULL, and do not modify the parameters during traversal. */
+struct pci_param *pci_walk_params(struct pci_access *acc, struct pci_param *prev) PCI_ABI;
+
+/*
+ * Devices
+ */
+
+struct pci_dev {
+ struct pci_dev *next; /* Next device in the chain */
+ u16 domain; /* PCI domain (host bridge) */
+ u8 bus, dev, func; /* Bus inside domain, device and function */
+
+ /* These fields are set by pci_fill_info() */
+ int known_fields; /* Set of info fields already known */
+ u16 vendor_id, device_id; /* Identity of the device */
+ u16 device_class; /* PCI device class */
+ int irq; /* IRQ number */
+ pciaddr_t base_addr[6]; /* Base addresses including flags in lower bits */
+ pciaddr_t size[6]; /* Region sizes */
+ pciaddr_t rom_base_addr; /* Expansion ROM base address */
+ pciaddr_t rom_size; /* Expansion ROM size */
+ struct pci_cap *first_cap; /* List of capabilities */
+ char *phy_slot; /* Physical slot */
+
+ /* Fields used internally: */
+ struct pci_access *access;
+ struct pci_methods *methods;
+ u8 *cache; /* Cached config registers */
+ int cache_len;
+ int hdrtype; /* Cached low 7 bits of header type, -1 if unknown */
+ void *aux; /* Auxillary data */
+};
+
+#define PCI_ADDR_IO_MASK (~(pciaddr_t) 0x3)
+#define PCI_ADDR_MEM_MASK (~(pciaddr_t) 0xf)
+#define PCI_ADDR_FLAG_MASK 0xf
+
+u8 pci_read_byte(struct pci_dev *, int pos) PCI_ABI; /* Access to configuration space */
+u16 pci_read_word(struct pci_dev *, int pos) PCI_ABI;
+u32 pci_read_long(struct pci_dev *, int pos) PCI_ABI;
+int pci_read_block(struct pci_dev *, int pos, u8 *buf, int len) PCI_ABI;
+int pci_read_vpd(struct pci_dev *d, int pos, u8 *buf, int len) PCI_ABI;
+int pci_write_byte(struct pci_dev *, int pos, u8 data) PCI_ABI;
+int pci_write_word(struct pci_dev *, int pos, u16 data) PCI_ABI;
+int pci_write_long(struct pci_dev *, int pos, u32 data) PCI_ABI;
+int pci_write_block(struct pci_dev *, int pos, u8 *buf, int len) PCI_ABI;
+
+int pci_fill_info(struct pci_dev *, int flags) PCI_ABI; /* Fill in device information */
+
+#define PCI_FILL_IDENT 1
+#define PCI_FILL_IRQ 2
+#define PCI_FILL_BASES 4
+#define PCI_FILL_ROM_BASE 8
+#define PCI_FILL_SIZES 16
+#define PCI_FILL_CLASS 32
+#define PCI_FILL_CAPS 64
+#define PCI_FILL_EXT_CAPS 128
+#define PCI_FILL_PHYS_SLOT 256
+#define PCI_FILL_RESCAN 0x10000
+
+void pci_setup_cache(struct pci_dev *, u8 *cache, int len) PCI_ABI;
+
+/*
+ * Capabilities
+ */
+
+struct pci_cap {
+ struct pci_cap *next;
+ u16 id; /* PCI_CAP_ID_xxx */
+ u16 type; /* PCI_CAP_xxx */
+ unsigned int addr; /* Position in the config space */
+};
+
+#define PCI_CAP_NORMAL 1 /* Traditional PCI capabilities */
+#define PCI_CAP_EXTENDED 2 /* PCIe extended capabilities */
+
+struct pci_cap *pci_find_cap(struct pci_dev *, unsigned int id, unsigned int type) PCI_ABI;
+
+/*
+ * Filters
+ */
+
+struct pci_filter {
+ int domain, bus, slot, func; /* -1 = ANY */
+ int vendor, device;
+};
+
+void pci_filter_init(struct pci_access *, struct pci_filter *) PCI_ABI;
+char *pci_filter_parse_slot(struct pci_filter *, char *) PCI_ABI;
+char *pci_filter_parse_id(struct pci_filter *, char *) PCI_ABI;
+int pci_filter_match(struct pci_filter *, struct pci_dev *) PCI_ABI;
+
+/*
+ * Conversion of PCI ID's to names (according to the pci.ids file)
+ *
+ * Call pci_lookup_name() to identify different types of ID's:
+ *
+ * VENDOR (vendorID) -> vendor
+ * DEVICE (vendorID, deviceID) -> device
+ * VENDOR | DEVICE (vendorID, deviceID) -> combined vendor and device
+ * SUBSYSTEM | VENDOR (subvendorID) -> subsystem vendor
+ * SUBSYSTEM | DEVICE (vendorID, deviceID, subvendorID, subdevID) -> subsystem device
+ * SUBSYSTEM | VENDOR | DEVICE (vendorID, deviceID, subvendorID, subdevID) -> combined subsystem v+d
+ * SUBSYSTEM | ... (-1, -1, subvendorID, subdevID) -> generic subsystem
+ * CLASS (classID) -> class
+ * PROGIF (classID, progif) -> programming interface
+ */
+
+char *pci_lookup_name(struct pci_access *a, char *buf, int size, int flags, ...) PCI_ABI;
+
+int pci_load_name_list(struct pci_access *a) PCI_ABI; /* Called automatically by pci_lookup_*() when needed; returns success */
+void pci_free_name_list(struct pci_access *a) PCI_ABI; /* Called automatically by pci_cleanup() */
+void pci_set_name_list_path(struct pci_access *a, char *name, int to_be_freed) PCI_ABI;
+void pci_id_cache_flush(struct pci_access *a) PCI_ABI;
+
+enum pci_lookup_mode {
+ PCI_LOOKUP_VENDOR = 1, /* Vendor name (args: vendorID) */
+ PCI_LOOKUP_DEVICE = 2, /* Device name (args: vendorID, deviceID) */
+ PCI_LOOKUP_CLASS = 4, /* Device class (args: classID) */
+ PCI_LOOKUP_SUBSYSTEM = 8,
+ PCI_LOOKUP_PROGIF = 16, /* Programming interface (args: classID, prog_if) */
+ PCI_LOOKUP_NUMERIC = 0x10000, /* Want only formatted numbers; default if access->numeric_ids is set */
+ PCI_LOOKUP_NO_NUMBERS = 0x20000, /* Return NULL if not found in the database; default is to print numerically */
+ PCI_LOOKUP_MIXED = 0x40000, /* Include both numbers and names */
+ PCI_LOOKUP_NETWORK = 0x80000, /* Try to resolve unknown ID's by DNS */
+ PCI_LOOKUP_SKIP_LOCAL = 0x100000, /* Do not consult local database */
+ PCI_LOOKUP_CACHE = 0x200000, /* Consult the local cache before using DNS */
+ PCI_LOOKUP_REFRESH_CACHE = 0x400000, /* Forget all previously cached entries, but still allow updating the cache */
+};
+
+#endif
diff --git a/ext/hwloc/include/pci/types.h b/ext/hwloc/include/pci/types.h
new file mode 100644
index 000000000..4d23e692b
--- /dev/null
+++ b/ext/hwloc/include/pci/types.h
@@ -0,0 +1,65 @@
+/*
+ * The PCI Library -- Types and Format Strings
+ *
+ * Copyright (c) 1997--2008 Martin Mares
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#include
+
+#ifndef PCI_HAVE_Uxx_TYPES
+
+#ifdef PCI_OS_WINDOWS
+#include
+typedef BYTE u8;
+typedef WORD u16;
+typedef DWORD u32;
+#elif defined(PCI_HAVE_STDINT_H) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
+#include
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+#else
+typedef u_int8_t u8;
+typedef u_int16_t u16;
+typedef u_int32_t u32;
+#endif
+
+#ifdef PCI_HAVE_64BIT_ADDRESS
+#include
+#if ULONG_MAX > 0xffffffff
+typedef unsigned long u64;
+#define PCI_U64_FMT "l"
+#else
+typedef unsigned long long u64;
+#define PCI_U64_FMT "ll"
+#endif
+#endif
+
+#endif /* PCI_HAVE_Uxx_TYPES */
+
+#ifdef PCI_HAVE_64BIT_ADDRESS
+typedef u64 pciaddr_t;
+#define PCIADDR_T_FMT "%08" PCI_U64_FMT "x"
+#define PCIADDR_PORT_FMT "%04" PCI_U64_FMT "x"
+#else
+typedef u32 pciaddr_t;
+#define PCIADDR_T_FMT "%08x"
+#define PCIADDR_PORT_FMT "%04x"
+#endif
+
+#ifdef PCI_ARCH_SPARC64
+/* On sparc64 Linux the kernel reports remapped port addresses and IRQ numbers */
+#undef PCIADDR_PORT_FMT
+#define PCIADDR_PORT_FMT PCIADDR_T_FMT
+#define PCIIRQ_FMT "%08x"
+#else
+#define PCIIRQ_FMT "%d"
+#endif
+
+#if defined(__GNUC__) && __GNUC__ > 2
+#define PCI_PRINTF(x,y) __attribute__((format(printf, x, y)))
+#else
+#define PCI_PRINTF(x,y)
+#endif
diff --git a/ext/hwloc/include/private/autogen/config.h b/ext/hwloc/include/private/autogen/config.h
new file mode 100644
index 000000000..6f440d09b
--- /dev/null
+++ b/ext/hwloc/include/private/autogen/config.h
@@ -0,0 +1,684 @@
+/* include/private/autogen/config.h. Generated from config.h.in by configure. */
+/* include/private/autogen/config.h.in. Generated from configure.ac by autoheader. */
+
+/* -*- c -*-
+ *
+ * Copyright © 2009, 2011, 2012 CNRS, inria., Université Bordeaux 1 All rights reserved.
+ * Copyright © 2009 Cisco Systems, Inc. All rights reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ *
+ * This file is automatically generated by configure. Edits will be lost
+ * the next time you run configure!
+ */
+
+#ifndef HWLOC_CONFIGURE_H
+#define HWLOC_CONFIGURE_H
+
+
+/* Define to 1 if the system has the type `CACHE_DESCRIPTOR'. */
+/* #undef HAVE_CACHE_DESCRIPTOR */
+
+/* Define to 1 if the system has the type `CACHE_RELATIONSHIP'. */
+/* #undef HAVE_CACHE_RELATIONSHIP */
+
+/* Define to 1 if you have the `clz' function. */
+/* #undef HAVE_CLZ */
+
+/* Define to 1 if you have the `clzl' function. */
+/* #undef HAVE_CLZL */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_CL_CL_EXT_H */
+
+/* Define to 1 if you have the `cpuset_setaffinity' function. */
+/* #undef HAVE_CPUSET_SETAFFINITY */
+
+/* Define to 1 if you have the `cpuset_setid' function. */
+/* #undef HAVE_CPUSET_SETID */
+
+/* Define to 1 if we have -lcuda */
+/* #undef HAVE_CUDA */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_CUDA_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_CUDA_RUNTIME_API_H */
+
+/* Define to 1 if you have the declaration of `CL_DEVICE_TOPOLOGY_AMD', and to
+ 0 if you don't. */
+/* #undef HAVE_DECL_CL_DEVICE_TOPOLOGY_AMD */
+
+/* Define to 1 if you have the declaration of `CTL_HW', and to 0 if you don't.
+ */
+#define HAVE_DECL_CTL_HW 0
+
+/* Define to 1 if you have the declaration of `fabsf', and to 0 if you don't.
+ */
+#define HAVE_DECL_FABSF 1
+
+/* Define to 1 if you have the declaration of `HW_NCPU', and to 0 if you
+ don't. */
+#define HAVE_DECL_HW_NCPU 0
+
+/* Define to 1 if you have the declaration of
+ `nvmlDeviceGetMaxPcieLinkGeneration', and to 0 if you don't. */
+/* #undef HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION */
+
+/* Define to 1 if you have the declaration of `PCI_LOOKUP_NO_NUMBERS', and to
+ 0 if you don't. */
+/* #undef HAVE_DECL_PCI_LOOKUP_NO_NUMBERS */
+
+/* Define to 1 if you have the declaration of `pthread_getaffinity_np', and to
+ 0 if you don't. */
+#define HAVE_DECL_PTHREAD_GETAFFINITY_NP 1
+
+/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
+ 0 if you don't. */
+#define HAVE_DECL_PTHREAD_SETAFFINITY_NP 1
+
+/* Define to 1 if you have the declaration of `strtoull', and to 0 if you
+ don't. */
+#define HAVE_DECL_STRTOULL 1
+
+/* Define to 1 if you have the declaration of `_SC_LARGE_PAGESIZE', and to 0
+ if you don't. */
+#define HAVE_DECL__SC_LARGE_PAGESIZE 0
+
+/* Define to 1 if you have the declaration of `_SC_NPROCESSORS_CONF', and to 0
+ if you don't. */
+#define HAVE_DECL__SC_NPROCESSORS_CONF 1
+
+/* Define to 1 if you have the declaration of `_SC_NPROCESSORS_ONLN', and to 0
+ if you don't. */
+#define HAVE_DECL__SC_NPROCESSORS_ONLN 1
+
+/* Define to 1 if you have the declaration of `_SC_NPROC_CONF', and to 0 if
+ you don't. */
+#define HAVE_DECL__SC_NPROC_CONF 0
+
+/* Define to 1 if you have the declaration of `_SC_NPROC_ONLN', and to 0 if
+ you don't. */
+#define HAVE_DECL__SC_NPROC_ONLN 0
+
+/* Define to 1 if you have the declaration of `_SC_PAGESIZE', and to 0 if you
+ don't. */
+#define HAVE_DECL__SC_PAGESIZE 1
+
+/* Define to 1 if you have the declaration of `_SC_PAGE_SIZE', and to 0 if you
+ don't. */
+#define HAVE_DECL__SC_PAGE_SIZE 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_DIRENT_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the `ffs' function. */
+#define HAVE_FFS 1
+
+/* Define to 1 if you have the `ffsl' function. */
+#define HAVE_FFSL 1
+
+/* Define to 1 if you have the `fls' function. */
+/* #undef HAVE_FLS */
+
+/* Define to 1 if you have the `flsl' function. */
+/* #undef HAVE_FLSL */
+
+/* Define to 1 if you have the `getpagesize' function. */
+#define HAVE_GETPAGESIZE 1
+
+/* Define to 1 if the system has the type `GROUP_AFFINITY'. */
+/* #undef HAVE_GROUP_AFFINITY */
+
+/* Define to 1 if the system has the type `GROUP_RELATIONSHIP'. */
+/* #undef HAVE_GROUP_RELATIONSHIP */
+
+/* Define to 1 if you have the `host_info' function. */
+/* #undef HAVE_HOST_INFO */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_INFINIBAND_VERBS_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if the system has the type `KAFFINITY'. */
+/* #undef HAVE_KAFFINITY */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_KSTAT_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_LANGINFO_H 1
+
+/* Define to 1 if we have -lgdi32 */
+/* #undef HAVE_LIBGDI32 */
+
+/* Define to 1 if we have -libverbs */
+#define HAVE_LIBIBVERBS 1
+
+/* Define to 1 if we have -lkstat */
+/* #undef HAVE_LIBKSTAT */
+
+/* Define to 1 if we have -llgrp */
+/* #undef HAVE_LIBLGRP */
+
+/* Define to 1 if you have the `pci' library (-lpci). */
+/* #undef HAVE_LIBPCI */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_LOCALE_H 1
+
+/* Define to 1 if the system has the type `LOGICAL_PROCESSOR_RELATIONSHIP'. */
+/* #undef HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_MACH_MACH_HOST_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_MACH_MACH_INIT_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_MALLOC_H 1
+
+/* Define to 1 if you have the `memalign' function. */
+#define HAVE_MEMALIGN 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if we have -lmyriexpress */
+/* #undef HAVE_MYRIEXPRESS */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_MYRIEXPRESS_H */
+
+/* Define to 1 if you have the `nl_langinfo' function. */
+#define HAVE_NL_LANGINFO 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_NUMAIF_H */
+
+/* Define to 1 if the system has the type `NUMA_NODE_RELATIONSHIP'. */
+/* #undef HAVE_NUMA_NODE_RELATIONSHIP */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_NVCTRL_NVCTRL_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_NVML_H */
+
+/* Define to 1 if you have the `openat' function. */
+#define HAVE_OPENAT 1
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_PCI_PCI_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_PICL_H */
+
+/* Define to 1 if you have the `posix_memalign' function. */
+#define HAVE_POSIX_MEMALIGN 1
+
+/* Define to 1 if the system has the type `PROCESSOR_CACHE_TYPE'. */
+/* #undef HAVE_PROCESSOR_CACHE_TYPE */
+
+/* Define to 1 if the system has the type `PROCESSOR_GROUP_INFO'. */
+/* #undef HAVE_PROCESSOR_GROUP_INFO */
+
+/* Define to 1 if the system has the type `PROCESSOR_RELATIONSHIP'. */
+/* #undef HAVE_PROCESSOR_RELATIONSHIP */
+
+/* Define to 1 if the system has the type `PSAPI_WORKING_SET_EX_BLOCK'. */
+/* #undef HAVE_PSAPI_WORKING_SET_EX_BLOCK */
+
+/* Define to 1 if the system has the type `PSAPI_WORKING_SET_EX_INFORMATION'.
+ */
+/* #undef HAVE_PSAPI_WORKING_SET_EX_INFORMATION */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_PTHREAD_NP_H */
+
+/* Define to 1 if the system has the type `pthread_t'. */
+#define HAVE_PTHREAD_T 1
+
+/* Define to 1 if you have the `putwc' function. */
+#define HAVE_PUTWC 1
+
+/* Define to 1 if the system has the type `RelationProcessorPackage'. */
+/* #undef HAVE_RELATIONPROCESSORPACKAGE */
+
+/* Define to 1 if you have the `setlocale' function. */
+#define HAVE_SETLOCALE 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the `strftime' function. */
+#define HAVE_STRFTIME 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the `strncasecmp' function. */
+#define HAVE_STRNCASECMP 1
+
+/* Define to '1' if sysctl is present and usable */
+#define HAVE_SYSCTL 1
+
+/* Define to '1' if sysctlbyname is present and usable */
+/* #undef HAVE_SYSCTLBYNAME */
+
+/* Define to 1 if the system has the type
+ `SYSTEM_LOGICAL_PROCESSOR_INFORMATION'. */
+/* #undef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION */
+
+/* Define to 1 if the system has the type
+ `SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX'. */
+/* #undef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_CPUSET_H */
+
+/* Define to 1 if you have the header file. */
+/* #undef HAVE_SYS_LGRP_USER_H */
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_MMAN_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_PARAM_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_SYSCTL_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_SYS_UTSNAME_H 1
+
+/* Define to 1 if you have the `uname' function. */
+#define HAVE_UNAME 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the `uselocale' function. */
+#define HAVE_USELOCALE 1
+
+/* Define to 1 if the system has the type `wchar_t'. */
+#define HAVE_WCHAR_T 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_X11_KEYSYM_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_X11_XLIB_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_X11_XUTIL_H 1
+
+/* Define to 1 if you have the header file. */
+#define HAVE_XLOCALE_H 1
+
+/* Define to 1 on AIX */
+/* #undef HWLOC_AIX_SYS */
+
+/* Define to 1 on BlueGene/Q */
+/* #undef HWLOC_BGQ_SYS */
+
+/* Whether C compiler supports symbol visibility or not */
+#define HWLOC_C_HAVE_VISIBILITY 1
+
+/* Define to 1 on Darwin */
+/* #undef HWLOC_DARWIN_SYS */
+
+/* Whether we are in debugging mode or not */
+/* #undef HWLOC_DEBUG */
+
+/* Define to 1 on *FREEBSD */
+/* #undef HWLOC_FREEBSD_SYS */
+
+/* Whether your compiler has __attribute__ or not */
+#define HWLOC_HAVE_ATTRIBUTE 1
+
+/* Whether your compiler has __attribute__ aligned or not */
+#define HWLOC_HAVE_ATTRIBUTE_ALIGNED 1
+
+/* Whether your compiler has __attribute__ always_inline or not */
+#define HWLOC_HAVE_ATTRIBUTE_ALWAYS_INLINE 1
+
+/* Whether your compiler has __attribute__ cold or not */
+#define HWLOC_HAVE_ATTRIBUTE_COLD 1
+
+/* Whether your compiler has __attribute__ const or not */
+#define HWLOC_HAVE_ATTRIBUTE_CONST 1
+
+/* Whether your compiler has __attribute__ deprecated or not */
+#define HWLOC_HAVE_ATTRIBUTE_DEPRECATED 1
+
+/* Whether your compiler has __attribute__ format or not */
+#define HWLOC_HAVE_ATTRIBUTE_FORMAT 1
+
+/* Whether your compiler has __attribute__ hot or not */
+#define HWLOC_HAVE_ATTRIBUTE_HOT 1
+
+/* Whether your compiler has __attribute__ malloc or not */
+#define HWLOC_HAVE_ATTRIBUTE_MALLOC 1
+
+/* Whether your compiler has __attribute__ may_alias or not */
+#define HWLOC_HAVE_ATTRIBUTE_MAY_ALIAS 1
+
+/* Whether your compiler has __attribute__ nonnull or not */
+#define HWLOC_HAVE_ATTRIBUTE_NONNULL 1
+
+/* Whether your compiler has __attribute__ noreturn or not */
+#define HWLOC_HAVE_ATTRIBUTE_NORETURN 1
+
+/* Whether your compiler has __attribute__ no_instrument_function or not */
+#define HWLOC_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION 1
+
+/* Whether your compiler has __attribute__ packed or not */
+#define HWLOC_HAVE_ATTRIBUTE_PACKED 1
+
+/* Whether your compiler has __attribute__ pure or not */
+#define HWLOC_HAVE_ATTRIBUTE_PURE 1
+
+/* Whether your compiler has __attribute__ sentinel or not */
+#define HWLOC_HAVE_ATTRIBUTE_SENTINEL 1
+
+/* Whether your compiler has __attribute__ unused or not */
+#define HWLOC_HAVE_ATTRIBUTE_UNUSED 1
+
+/* Whether your compiler has __attribute__ warn unused result or not */
+#define HWLOC_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT 1
+
+/* Whether your compiler has __attribute__ weak alias or not */
+#define HWLOC_HAVE_ATTRIBUTE_WEAK_ALIAS 1
+
+/* Define to 1 if your `ffs' function is known to be broken. */
+/* #undef HWLOC_HAVE_BROKEN_FFS */
+
+/* Define to 1 if you have the `cairo' library. */
+/* #undef HWLOC_HAVE_CAIRO */
+
+/* Define to 1 if you have the `clz' function. */
+/* #undef HWLOC_HAVE_CLZ */
+
+/* Define to 1 if you have the `clzl' function. */
+/* #undef HWLOC_HAVE_CLZL */
+
+/* Define to 1 if you have cpuid */
+#define HWLOC_HAVE_CPUID 1
+
+/* Define to 1 if the CPU_SET macro works */
+#define HWLOC_HAVE_CPU_SET 1
+
+/* Define to 1 if the CPU_SET_S macro works */
+#define HWLOC_HAVE_CPU_SET_S 1
+
+/* Define to 1 if you have the `cudart' SDK. */
+/* #undef HWLOC_HAVE_CUDART */
+
+/* Define to 1 if function `clz' is declared by system headers */
+/* #undef HWLOC_HAVE_DECL_CLZ */
+
+/* Define to 1 if function `clzl' is declared by system headers */
+/* #undef HWLOC_HAVE_DECL_CLZL */
+
+/* Define to 1 if function `ffs' is declared by system headers */
+#define HWLOC_HAVE_DECL_FFS 1
+
+/* Define to 1 if function `ffsl' is declared by system headers */
+#define HWLOC_HAVE_DECL_FFSL 1
+
+/* Define to 1 if function `fls' is declared by system headers */
+/* #undef HWLOC_HAVE_DECL_FLS */
+
+/* Define to 1 if function `flsl' is declared by system headers */
+/* #undef HWLOC_HAVE_DECL_FLSL */
+
+/* Define to 1 if you have the `ffs' function. */
+#define HWLOC_HAVE_FFS 1
+
+/* Define to 1 if you have the `ffsl' function. */
+#define HWLOC_HAVE_FFSL 1
+
+/* Define to 1 if you have the `fls' function. */
+/* #undef HWLOC_HAVE_FLS */
+
+/* Define to 1 if you have the `flsl' function. */
+/* #undef HWLOC_HAVE_FLSL */
+
+/* Define to 1 if you have the GL module components. */
+/* #undef HWLOC_HAVE_GL */
+
+/* Define to 1 if you have the `libpciaccess' library. */
+/* #undef HWLOC_HAVE_LIBPCIACCESS */
+
+/* Define to 1 if you have a library providing the termcap interface */
+#define HWLOC_HAVE_LIBTERMCAP 1
+
+/* Define to 1 if you have the `libxml2' library. */
+#define HWLOC_HAVE_LIBXML2 1
+
+/* Define to 1 if building the Linux PCI component */
+#define HWLOC_HAVE_LINUXPCI 1
+
+/* Define to 1 if mbind is available. */
+/* #undef HWLOC_HAVE_MBIND */
+
+/* Define to 1 if migrate_pages is available. */
+/* #undef HWLOC_HAVE_MIGRATE_PAGES */
+
+/* Define to 1 if you have the `NVML' library. */
+/* #undef HWLOC_HAVE_NVML */
+
+/* Define to 1 if glibc provides the old prototype (without length) of
+ sched_setaffinity() */
+/* #undef HWLOC_HAVE_OLD_SCHED_SETAFFINITY */
+
+/* Define to 1 if you have the `OpenCL' library. */
+/* #undef HWLOC_HAVE_OPENCL */
+
+/* Define to 1 if `libpci' struct pci_dev has a `device_class' field. */
+/* #undef HWLOC_HAVE_PCIDEV_DEVICE_CLASS */
+
+/* Define to 1 if `libpci' struct pci_dev has a `domain' field. */
+/* #undef HWLOC_HAVE_PCIDEV_DOMAIN */
+
+/* Define to 1 if you have the pciutils `libpci' library. */
+/* #undef HWLOC_HAVE_PCIUTILS */
+
+/* Define to 1 if `libpci' has the `pci_find_cap' function. */
+/* #undef HWLOC_HAVE_PCI_FIND_CAP */
+
+/* Define to 1 if the hwloc library should support dynamically-loaded plugins
+ */
+/* #undef HWLOC_HAVE_PLUGINS */
+
+/* `Define to 1 if you have pthread_getthrds_np' */
+/* #undef HWLOC_HAVE_PTHREAD_GETTHRDS_NP */
+
+/* Define to 1 if pthread mutexes are available */
+#define HWLOC_HAVE_PTHREAD_MUTEX 1
+
+/* Define to 1 if glibc provides a prototype of sched_setaffinity() */
+#define HWLOC_HAVE_SCHED_SETAFFINITY 1
+
+/* Define to 1 if set_mempolicy is available. */
+/* #undef HWLOC_HAVE_SET_MEMPOLICY */
+
+/* Define to 1 if you have the header file. */
+#define HWLOC_HAVE_STDINT_H 1
+
+/* Define to 1 if you have the `windows.h' header. */
+/* #undef HWLOC_HAVE_WINDOWS_H */
+
+/* Define to 1 if X11 headers including Xutil.h and keysym.h are available. */
+#define HWLOC_HAVE_X11_KEYSYM 1
+
+/* Define to 1 if the _syscall3 macro works */
+/* #undef HWLOC_HAVE__SYSCALL3 */
+
+/* Define to 1 on HP-UX */
+/* #undef HWLOC_HPUX_SYS */
+
+/* Define to 1 on Irix */
+/* #undef HWLOC_IRIX_SYS */
+
+/* Define to 1 on Linux */
+#define HWLOC_LINUX_SYS 1
+
+/* Define to 1 on *NETBSD */
+/* #undef HWLOC_NETBSD_SYS */
+
+/* Define to 1 on OSF */
+/* #undef HWLOC_OSF_SYS */
+
+/* The size of `unsigned int', as computed by sizeof */
+#define HWLOC_SIZEOF_UNSIGNED_INT 4
+
+/* The size of `unsigned long', as computed by sizeof */
+#define HWLOC_SIZEOF_UNSIGNED_LONG 8
+
+/* Define to 1 on Solaris */
+/* #undef HWLOC_SOLARIS_SYS */
+
+/* The hwloc symbol prefix */
+#define HWLOC_SYM_PREFIX hwloc_
+
+/* The hwloc symbol prefix in all caps */
+#define HWLOC_SYM_PREFIX_CAPS HWLOC_
+
+/* Whether we need to re-define all the hwloc public symbols or not */
+#define HWLOC_SYM_TRANSFORM 0
+
+/* Define to 1 on unsupported systems */
+/* #undef HWLOC_UNSUPPORTED_SYS */
+
+/* Define to 1 if ncurses works, preferred over curses */
+#define HWLOC_USE_NCURSES 1
+
+/* Define to 1 on WINDOWS */
+/* #undef HWLOC_WIN_SYS */
+
+/* Define to 1 on x86_32 */
+/* #undef HWLOC_X86_32_ARCH */
+
+/* Define to 1 on x86_64 */
+#define HWLOC_X86_64_ARCH 1
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+ */
+#define LT_OBJDIR ".libs/"
+
+/* Name of package */
+#define PACKAGE "hwloc"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "http://www.open-mpi.org/projects/hwloc/"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "hwloc"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "hwloc 1.8.1"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "hwloc"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.8.1"
+
+/* The size of `unsigned int', as computed by sizeof. */
+#define SIZEOF_UNSIGNED_INT 4
+
+/* The size of `unsigned long', as computed by sizeof. */
+#define SIZEOF_UNSIGNED_LONG 8
+
+/* The size of `void *', as computed by sizeof. */
+#define SIZEOF_VOID_P 8
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Enable extensions on HP-UX. */
+#ifndef _HPUX_SOURCE
+# define _HPUX_SOURCE 1
+#endif
+
+
+/* Enable extensions on AIX 3, Interix. */
+#ifndef _ALL_SOURCE
+# define _ALL_SOURCE 1
+#endif
+/* Enable GNU extensions on systems that have them. */
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE 1
+#endif
+/* Enable threading extensions on Solaris. */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# define _POSIX_PTHREAD_SEMANTICS 1
+#endif
+/* Enable extensions on HP NonStop. */
+#ifndef _TANDEM_SOURCE
+# define _TANDEM_SOURCE 1
+#endif
+/* Enable general extensions on Solaris. */
+#ifndef __EXTENSIONS__
+# define __EXTENSIONS__ 1
+#endif
+
+
+/* Version number of package */
+#define VERSION "1.8.1"
+
+/* Define to 1 if the X Window System is missing or not being used. */
+/* #undef X_DISPLAY_MISSING */
+
+/* Are we building for HP-UX? */
+#define _HPUX_SOURCE 1
+
+/* Define to 1 if on MINIX. */
+/* #undef _MINIX */
+
+/* Define to 2 if the system does not provide POSIX.1 features except with
+ this defined. */
+/* #undef _POSIX_1_SOURCE */
+
+/* Define to 1 if you need to in order for `stat' and other things to work. */
+/* #undef _POSIX_SOURCE */
+
+/* Define this to the process ID type */
+#define hwloc_pid_t pid_t
+
+/* Define this to either strncasecmp or strncmp */
+#define hwloc_strncasecmp strncasecmp
+
+/* Define this to the thread ID type */
+#define hwloc_thread_t pthread_t
+
+
+#endif /* HWLOC_CONFIGURE_H */
+
diff --git a/ext/hwloc/include/private/components.h b/ext/hwloc/include/private/components.h
new file mode 100644
index 000000000..b36634535
--- /dev/null
+++ b/ext/hwloc/include/private/components.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2012 Inria. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+
+#ifdef HWLOC_INSIDE_PLUGIN
+/*
+ * these declarations are internal only, they are not available to plugins
+ * (many functions below are internal static symbols).
+ */
+#error This file should not be used in plugins
+#endif
+
+
+#ifndef PRIVATE_COMPONENTS_H
+#define PRIVATE_COMPONENTS_H 1
+
+#include
+
+struct hwloc_topology;
+
+extern int hwloc_disc_component_force_enable(struct hwloc_topology *topology,
+ int envvar_forced, /* 1 if forced through envvar, 0 if forced through API */
+ int type, const char *name,
+ const void *data1, const void *data2, const void *data3);
+extern void hwloc_disc_components_enable_others(struct hwloc_topology *topology);
+
+/* Compute the topology is_thissystem flag based on enabled backends */
+extern void hwloc_backends_is_thissystem(struct hwloc_topology *topology);
+
+/* Disable and destroy all backends used by a topology */
+extern void hwloc_backends_disable_all(struct hwloc_topology *topology);
+
+/* Used by the core to setup/destroy the list of components */
+extern void hwloc_components_init(struct hwloc_topology *topology); /* increases components refcount, should be called exactly once per topology (during init) */
+extern void hwloc_components_destroy_all(struct hwloc_topology *topology); /* decreases components refcount, should be called exactly once per topology (during destroy) */
+
+#endif /* PRIVATE_COMPONENTS_H */
+
diff --git a/ext/hwloc/include/private/cpuid.h b/ext/hwloc/include/private/cpuid.h
new file mode 100644
index 000000000..214ab3827
--- /dev/null
+++ b/ext/hwloc/include/private/cpuid.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2010-2012 Université Bordeaux 1
+ * Copyright © 2010 Cisco Systems, Inc. All rights reserved.
+ * Copyright © 2014 Inria. All rights reserved.
+ *
+ * See COPYING in top-level directory.
+ */
+
+/* Internals for x86's cpuid. */
+
+#ifndef HWLOC_PRIVATE_CPUID_H
+#define HWLOC_PRIVATE_CPUID_H
+
+#ifdef HWLOC_X86_32_ARCH
+static __hwloc_inline int hwloc_have_cpuid(void)
+{
+ int ret;
+ unsigned tmp, tmp2;
+ asm(
+ "mov $0,%0\n\t" /* Not supported a priori */
+
+ "pushfl \n\t" /* Save flags */
+
+ "pushfl \n\t" \
+ "pop %1 \n\t" /* Get flags */ \
+
+#define TRY_TOGGLE \
+ "xor $0x00200000,%1\n\t" /* Try to toggle ID */ \
+ "mov %1,%2\n\t" /* Save expected value */ \
+ "push %1 \n\t" \
+ "popfl \n\t" /* Try to toggle */ \
+ "pushfl \n\t" \
+ "pop %1 \n\t" \
+ "cmp %1,%2\n\t" /* Compare with expected value */ \
+ "jnz Lhwloc1\n\t" /* Unexpected, failure */ \
+
+ TRY_TOGGLE /* Try to set/clear */
+ TRY_TOGGLE /* Try to clear/set */
+
+ "mov $1,%0\n\t" /* Passed the test! */
+
+ "Lhwloc1: \n\t"
+ "popfl \n\t" /* Restore flags */
+
+ : "=r" (ret), "=&r" (tmp), "=&r" (tmp2));
+ return ret;
+}
+#endif /* HWLOC_X86_32_ARCH */
+#ifdef HWLOC_X86_64_ARCH
+static __hwloc_inline int hwloc_have_cpuid(void) { return 1; }
+#endif /* HWLOC_X86_64_ARCH */
+
+static __hwloc_inline void hwloc_cpuid(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
+{
+ /* Note: gcc might want to use bx or the stack for %1 addressing, so we can't
+ * use them :/ */
+#ifdef HWLOC_X86_64_ARCH
+ hwloc_uint64_t sav_rbx;
+ asm(
+ "mov %%rbx,%2\n\t"
+ "cpuid\n\t"
+ "xchg %2,%%rbx\n\t"
+ "movl %k2,%1\n\t"
+ : "+a" (*eax), "=m" (*ebx), "=&r"(sav_rbx),
+ "+c" (*ecx), "=&d" (*edx));
+#elif defined(HWLOC_X86_32_ARCH)
+ unsigned long sav_ebx;
+ asm(
+ "mov %%ebx,%2\n\t"
+ "cpuid\n\t"
+ "xchg %2,%%ebx\n\t"
+ "movl %k2,%1\n\t"
+ : "+a" (*eax), "=m" (*ebx), "=&r"(sav_ebx),
+ "+c" (*ecx), "=&d" (*edx));
+#else
+#error unknown architecture
+#endif
+}
+
+#endif /* HWLOC_PRIVATE_CPUID_H */
diff --git a/ext/hwloc/include/private/debug.h b/ext/hwloc/include/private/debug.h
new file mode 100644
index 000000000..b327bf2a6
--- /dev/null
+++ b/ext/hwloc/include/private/debug.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2012 Inria. All rights reserved.
+ * Copyright © 2009, 2011 Université Bordeaux 1
+ * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* The configuration file */
+
+#ifndef HWLOC_DEBUG_H
+#define HWLOC_DEBUG_H
+
+#include
+
+#ifdef HWLOC_DEBUG
+#include
+#include
+#endif
+
+static __hwloc_inline void hwloc_debug(const char *s __hwloc_attribute_unused, ...)
+{
+#ifdef HWLOC_DEBUG
+ va_list ap;
+
+ va_start(ap, s);
+ vfprintf(stderr, s, ap);
+ va_end(ap);
+#endif
+}
+
+#ifdef HWLOC_DEBUG
+#define hwloc_debug_bitmap(fmt, bitmap) do { \
+ char *s; \
+ hwloc_bitmap_asprintf(&s, bitmap); \
+ fprintf(stderr, fmt, s); \
+ free(s); \
+} while (0)
+#define hwloc_debug_1arg_bitmap(fmt, arg1, bitmap) do { \
+ char *s; \
+ hwloc_bitmap_asprintf(&s, bitmap); \
+ fprintf(stderr, fmt, arg1, s); \
+ free(s); \
+} while (0)
+#define hwloc_debug_2args_bitmap(fmt, arg1, arg2, bitmap) do { \
+ char *s; \
+ hwloc_bitmap_asprintf(&s, bitmap); \
+ fprintf(stderr, fmt, arg1, arg2, s); \
+ free(s); \
+} while (0)
+#else
+#define hwloc_debug_bitmap(s, bitmap) do { } while(0)
+#define hwloc_debug_1arg_bitmap(s, arg1, bitmap) do { } while(0)
+#define hwloc_debug_2args_bitmap(s, arg1, arg2, bitmap) do { } while(0)
+#endif
+
+#endif /* HWLOC_DEBUG_H */
diff --git a/ext/hwloc/include/private/misc.h b/ext/hwloc/include/private/misc.h
new file mode 100644
index 000000000..3f4c95c33
--- /dev/null
+++ b/ext/hwloc/include/private/misc.h
@@ -0,0 +1,357 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2010 inria. All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux 1
+ * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+/* Misc macros and inlines. */
+
+#ifndef HWLOC_PRIVATE_MISC_H
+#define HWLOC_PRIVATE_MISC_H
+
+#include
+#include
+
+/* Compile-time assertion */
+#define HWLOC_BUILD_ASSERT(condition) ((void)sizeof(char[1 - 2*!(condition)]))
+
+#define HWLOC_BITS_PER_LONG (HWLOC_SIZEOF_UNSIGNED_LONG * 8)
+#define HWLOC_BITS_PER_INT (HWLOC_SIZEOF_UNSIGNED_INT * 8)
+
+#if (HWLOC_BITS_PER_LONG != 32) && (HWLOC_BITS_PER_LONG != 64)
+#error "unknown size for unsigned long."
+#endif
+
+#if (HWLOC_BITS_PER_INT != 16) && (HWLOC_BITS_PER_INT != 32) && (HWLOC_BITS_PER_INT != 64)
+#error "unknown size for unsigned int."
+#endif
+
+
+/**
+ * ffsl helpers.
+ */
+
+#if defined(HWLOC_HAVE_BROKEN_FFS)
+
+/* System has a broken ffs().
+ * We must check the before __GNUC__ or HWLOC_HAVE_FFSL
+ */
+# define HWLOC_NO_FFS
+
+#elif defined(__GNUC__)
+
+# if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
+ /* Starting from 3.4, gcc has a long variant. */
+# define hwloc_ffsl(x) __builtin_ffsl(x)
+# else
+# define hwloc_ffs(x) __builtin_ffs(x)
+# define HWLOC_NEED_FFSL
+# endif
+
+#elif defined(HWLOC_HAVE_FFSL)
+
+# ifndef HWLOC_HAVE_DECL_FFSL
+extern int ffsl(long) __hwloc_attribute_const;
+# endif
+
+# define hwloc_ffsl(x) ffsl(x)
+
+#elif defined(HWLOC_HAVE_FFS)
+
+# ifndef HWLOC_HAVE_DECL_FFS
+extern int ffs(int) __hwloc_attribute_const;
+# endif
+
+# define hwloc_ffs(x) ffs(x)
+# define HWLOC_NEED_FFSL
+
+#else /* no ffs implementation */
+
+# define HWLOC_NO_FFS
+
+#endif
+
+#ifdef HWLOC_NO_FFS
+
+/* no ffs or it is known to be broken */
+static __hwloc_inline int
+hwloc_ffsl_manual(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_ffsl_manual(unsigned long x)
+{
+ int i;
+
+ if (!x)
+ return 0;
+
+ i = 1;
+#if HWLOC_BITS_PER_LONG >= 64
+ if (!(x & 0xfffffffful)) {
+ x >>= 32;
+ i += 32;
+ }
+#endif
+ if (!(x & 0xffffu)) {
+ x >>= 16;
+ i += 16;
+ }
+ if (!(x & 0xff)) {
+ x >>= 8;
+ i += 8;
+ }
+ if (!(x & 0xf)) {
+ x >>= 4;
+ i += 4;
+ }
+ if (!(x & 0x3)) {
+ x >>= 2;
+ i += 2;
+ }
+ if (!(x & 0x1)) {
+ x >>= 1;
+ i += 1;
+ }
+
+ return i;
+}
+/* always define hwloc_ffsl as a macro, to avoid renaming breakage */
+#define hwloc_ffsl hwloc_ffsl_manual
+
+#elif defined(HWLOC_NEED_FFSL)
+
+/* We only have an int ffs(int) implementation, build a long one. */
+
+/* First make it 32 bits if it was only 16. */
+static __hwloc_inline int
+hwloc_ffs32(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_ffs32(unsigned long x)
+{
+#if HWLOC_BITS_PER_INT == 16
+ int low_ffs, hi_ffs;
+
+ low_ffs = hwloc_ffs(x & 0xfffful);
+ if (low_ffs)
+ return low_ffs;
+
+ hi_ffs = hwloc_ffs(x >> 16);
+ if (hi_ffs)
+ return hi_ffs + 16;
+
+ return 0;
+#else
+ return hwloc_ffs(x);
+#endif
+}
+
+/* Then make it 64 bit if longs are. */
+static __hwloc_inline int
+hwloc_ffsl_from_ffs32(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_ffsl_from_ffs32(unsigned long x)
+{
+#if HWLOC_BITS_PER_LONG == 64
+ int low_ffs, hi_ffs;
+
+ low_ffs = hwloc_ffs32(x & 0xfffffffful);
+ if (low_ffs)
+ return low_ffs;
+
+ hi_ffs = hwloc_ffs32(x >> 32);
+ if (hi_ffs)
+ return hi_ffs + 32;
+
+ return 0;
+#else
+ return hwloc_ffs32(x);
+#endif
+}
+/* always define hwloc_ffsl as a macro, to avoid renaming breakage */
+#define hwloc_ffsl hwloc_ffsl_from_ffs32
+
+#endif
+
+/**
+ * flsl helpers.
+ */
+#ifdef __GNUC_____
+
+# if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
+# define hwloc_flsl(x) (x ? 8*sizeof(long) - __builtin_clzl(x) : 0)
+# else
+# define hwloc_fls(x) (x ? 8*sizeof(int) - __builtin_clz(x) : 0)
+# define HWLOC_NEED_FLSL
+# endif
+
+#elif defined(HWLOC_HAVE_FLSL)
+
+# ifndef HWLOC_HAVE_DECL_FLSL
+extern int flsl(long) __hwloc_attribute_const;
+# endif
+
+# define hwloc_flsl(x) flsl(x)
+
+#elif defined(HWLOC_HAVE_CLZL)
+
+# ifndef HWLOC_HAVE_DECL_CLZL
+extern int clzl(long) __hwloc_attribute_const;
+# endif
+
+# define hwloc_flsl(x) (x ? 8*sizeof(long) - clzl(x) : 0)
+
+#elif defined(HWLOC_HAVE_FLS)
+
+# ifndef HWLOC_HAVE_DECL_FLS
+extern int fls(int) __hwloc_attribute_const;
+# endif
+
+# define hwloc_fls(x) fls(x)
+# define HWLOC_NEED_FLSL
+
+#elif defined(HWLOC_HAVE_CLZ)
+
+# ifndef HWLOC_HAVE_DECL_CLZ
+extern int clz(int) __hwloc_attribute_const;
+# endif
+
+# define hwloc_fls(x) (x ? 8*sizeof(int) - clz(x) : 0)
+# define HWLOC_NEED_FLSL
+
+#else /* no fls implementation */
+
+static __hwloc_inline int
+hwloc_flsl_manual(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_flsl_manual(unsigned long x)
+{
+ int i = 0;
+
+ if (!x)
+ return 0;
+
+ i = 1;
+#if HWLOC_BITS_PER_LONG >= 64
+ if ((x & 0xffffffff00000000ul)) {
+ x >>= 32;
+ i += 32;
+ }
+#endif
+ if ((x & 0xffff0000u)) {
+ x >>= 16;
+ i += 16;
+ }
+ if ((x & 0xff00)) {
+ x >>= 8;
+ i += 8;
+ }
+ if ((x & 0xf0)) {
+ x >>= 4;
+ i += 4;
+ }
+ if ((x & 0xc)) {
+ x >>= 2;
+ i += 2;
+ }
+ if ((x & 0x2)) {
+ x >>= 1;
+ i += 1;
+ }
+
+ return i;
+}
+/* always define hwloc_flsl as a macro, to avoid renaming breakage */
+#define hwloc_flsl hwloc_flsl_manual
+
+#endif
+
+#ifdef HWLOC_NEED_FLSL
+
+/* We only have an int fls(int) implementation, build a long one. */
+
+/* First make it 32 bits if it was only 16. */
+static __hwloc_inline int
+hwloc_fls32(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_fls32(unsigned long x)
+{
+#if HWLOC_BITS_PER_INT == 16
+ int low_fls, hi_fls;
+
+ hi_fls = hwloc_fls(x >> 16);
+ if (hi_fls)
+ return hi_fls + 16;
+
+ low_fls = hwloc_fls(x & 0xfffful);
+ if (low_fls)
+ return low_fls;
+
+ return 0;
+#else
+ return hwloc_fls(x);
+#endif
+}
+
+/* Then make it 64 bit if longs are. */
+static __hwloc_inline int
+hwloc_flsl_from_fls32(unsigned long x) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_flsl_from_fls32(unsigned long x)
+{
+#if HWLOC_BITS_PER_LONG == 64
+ int low_fls, hi_fls;
+
+ hi_fls = hwloc_fls32(x >> 32);
+ if (hi_fls)
+ return hi_fls + 32;
+
+ low_fls = hwloc_fls32(x & 0xfffffffful);
+ if (low_fls)
+ return low_fls;
+
+ return 0;
+#else
+ return hwloc_fls32(x);
+#endif
+}
+/* always define hwloc_flsl as a macro, to avoid renaming breakage */
+#define hwloc_flsl hwloc_flsl_from_fls32
+
+#endif
+
+static __hwloc_inline int
+hwloc_weight_long(unsigned long w) __hwloc_attribute_const;
+static __hwloc_inline int
+hwloc_weight_long(unsigned long w)
+{
+#if HWLOC_BITS_PER_LONG == 32
+#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4)
+ return __builtin_popcount(w);
+#else
+ unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
+ res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
+ return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
+#endif
+#else /* HWLOC_BITS_PER_LONG == 32 */
+#if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__) >= 4)
+ return __builtin_popcountll(w);
+#else
+ unsigned long res;
+ res = (w & 0x5555555555555555ul) + ((w >> 1) & 0x5555555555555555ul);
+ res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+ res = (res & 0x0F0F0F0F0F0F0F0Ful) + ((res >> 4) & 0x0F0F0F0F0F0F0F0Ful);
+ res = (res & 0x00FF00FF00FF00FFul) + ((res >> 8) & 0x00FF00FF00FF00FFul);
+ res = (res & 0x0000FFFF0000FFFFul) + ((res >> 16) & 0x0000FFFF0000FFFFul);
+ return (res & 0x00000000FFFFFFFFul) + ((res >> 32) & 0x00000000FFFFFFFFul);
+#endif
+#endif /* HWLOC_BITS_PER_LONG == 64 */
+}
+
+#if !HAVE_DECL_STRTOULL
+unsigned long long int strtoull(const char *nptr, char **endptr, int base);
+#endif
+
+#endif /* HWLOC_PRIVATE_MISC_H */
diff --git a/ext/hwloc/include/private/private.h b/ext/hwloc/include/private/private.h
new file mode 100644
index 000000000..5e684b0d6
--- /dev/null
+++ b/ext/hwloc/include/private/private.h
@@ -0,0 +1,300 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2014 Inria. All rights reserved.
+ * Copyright © 2009-2012 Université Bordeaux 1
+ * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
+ *
+ * See COPYING in top-level directory.
+ */
+
+/* Internal types and helpers. */
+
+
+#ifdef HWLOC_INSIDE_PLUGIN
+/*
+ * these declarations are internal only, they are not available to plugins
+ * (many functions below are internal static symbols).
+ */
+#error This file should not be used in plugins
+#endif
+
+
+#ifndef HWLOC_PRIVATE_H
+#define HWLOC_PRIVATE_H
+
+#include
+#include
+#include
+#include
+#include
+#include
+#ifdef HAVE_UNISTD_H
+#include
+#endif
+#ifdef HAVE_STDINT_H
+#include
+#endif
+#ifdef HAVE_SYS_UTSNAME_H
+#include
+#endif
+#include
+
+enum hwloc_ignore_type_e {
+ HWLOC_IGNORE_TYPE_NEVER = 0,
+ HWLOC_IGNORE_TYPE_KEEP_STRUCTURE,
+ HWLOC_IGNORE_TYPE_ALWAYS
+};
+
+#define HWLOC_DEPTH_MAX 128
+
+struct hwloc_topology {
+ unsigned nb_levels; /* Number of horizontal levels */
+ unsigned next_group_depth; /* Depth of the next Group object that we may create */
+ unsigned level_nbobjects[HWLOC_DEPTH_MAX]; /* Number of objects on each horizontal level */
+ struct hwloc_obj **levels[HWLOC_DEPTH_MAX]; /* Direct access to levels, levels[l = 0 .. nblevels-1][0..level_nbobjects[l]] */
+ unsigned long flags;
+ int type_depth[HWLOC_OBJ_TYPE_MAX];
+ enum hwloc_ignore_type_e ignored_types[HWLOC_OBJ_TYPE_MAX];
+ int is_thissystem;
+ int is_loaded;
+ hwloc_pid_t pid; /* Process ID the topology is view from, 0 for self */
+
+ unsigned bridge_nbobjects;
+ struct hwloc_obj **bridge_level;
+ struct hwloc_obj *first_bridge, *last_bridge;
+ unsigned pcidev_nbobjects;
+ struct hwloc_obj **pcidev_level;
+ struct hwloc_obj *first_pcidev, *last_pcidev;
+ unsigned osdev_nbobjects;
+ struct hwloc_obj **osdev_level;
+ struct hwloc_obj *first_osdev, *last_osdev;
+
+ struct hwloc_binding_hooks {
+ int (*set_thisproc_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
+ int (*get_thisproc_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+ int (*set_thisthread_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
+ int (*get_thisthread_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+ int (*set_proc_cpubind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, int flags);
+ int (*get_proc_cpubind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags);
+#ifdef hwloc_thread_t
+ int (*set_thread_cpubind)(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_const_cpuset_t set, int flags);
+ int (*get_thread_cpubind)(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_cpuset_t set, int flags);
+#endif
+
+ int (*get_thisproc_last_cpu_location)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+ int (*get_thisthread_last_cpu_location)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
+ int (*get_proc_last_cpu_location)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, int flags);
+
+ int (*set_thisproc_membind)(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+ int (*get_thisproc_membind)(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+ int (*set_thisthread_membind)(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+ int (*get_thisthread_membind)(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+ int (*set_proc_membind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+ int (*get_proc_membind)(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+ int (*set_area_membind)(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+ int (*get_area_membind)(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags);
+ /* This has to return the same kind of pointer as alloc_membind, so that free_membind can be used on it */
+ void *(*alloc)(hwloc_topology_t topology, size_t len);
+ /* alloc_membind has to always succeed if !(flags & HWLOC_MEMBIND_STRICT).
+ * see hwloc_alloc_or_fail which is convenient for that. */
+ void *(*alloc_membind)(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags);
+ int (*free_membind)(hwloc_topology_t topology, void *addr, size_t len);
+ } binding_hooks;
+
+ struct hwloc_topology_support support;
+
+ void (*userdata_export_cb)(void *reserved, struct hwloc_topology *topology, struct hwloc_obj *obj);
+ void (*userdata_import_cb)(struct hwloc_topology *topology, struct hwloc_obj *obj, const char *name, const void *buffer, size_t length);
+
+ struct hwloc_os_distances_s {
+ hwloc_obj_type_t type;
+ int nbobjs;
+ unsigned *indexes; /* array of OS indexes before we can convert them into objs. always available.
+ */
+ struct hwloc_obj **objs; /* array of objects, in the same order as above.
+ * either given (by a backend) together with the indexes array above.
+ * or build from the above indexes array when not given (by the user).
+ */
+ float *distances; /* distance matrices, ordered according to the above indexes/objs array.
+ * distance from i to j is stored in slot i*nbnodes+j.
+ * will be copied into the main logical-index-ordered distance at the end of the discovery.
+ */
+ int forced; /* set if the user forced a matrix to ignore the OS one */
+
+ struct hwloc_os_distances_s *prev, *next;
+ } *first_osdist, *last_osdist;
+
+ /* list of enabled backends. */
+ struct hwloc_backend * backends;
+};
+
+extern void hwloc_alloc_obj_cpusets(hwloc_obj_t obj);
+extern void hwloc_setup_pu_level(struct hwloc_topology *topology, unsigned nb_pus);
+extern int hwloc_get_sysctlbyname(const char *name, int64_t *n);
+extern int hwloc_get_sysctl(int name[], unsigned namelen, int *n);
+extern unsigned hwloc_fallback_nbprocessors(struct hwloc_topology *topology);
+extern void hwloc_connect_children(hwloc_obj_t obj);
+extern int hwloc_connect_levels(hwloc_topology_t topology);
+
+extern void hwloc_topology_setup_defaults(struct hwloc_topology *topology);
+extern void hwloc_topology_clear(struct hwloc_topology *topology);
+
+/* set native OS binding hooks */
+extern void hwloc_set_native_binding_hooks(struct hwloc_binding_hooks *hooks, struct hwloc_topology_support *support);
+/* set either native OS binding hooks (if thissystem), or dummy ones */
+extern void hwloc_set_binding_hooks(struct hwloc_topology *topology);
+
+#if defined(HWLOC_LINUX_SYS)
+extern void hwloc_set_linuxfs_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_LINUX_SYS */
+
+#if defined(HWLOC_BGQ_SYS)
+extern void hwloc_set_bgq_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_BGQ_SYS */
+
+#ifdef HWLOC_SOLARIS_SYS
+extern void hwloc_set_solaris_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_SOLARIS_SYS */
+
+#ifdef HWLOC_AIX_SYS
+extern void hwloc_set_aix_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_AIX_SYS */
+
+#ifdef HWLOC_OSF_SYS
+extern void hwloc_set_osf_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_OSF_SYS */
+
+#ifdef HWLOC_WIN_SYS
+extern void hwloc_set_windows_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_WIN_SYS */
+
+#ifdef HWLOC_DARWIN_SYS
+extern void hwloc_set_darwin_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_DARWIN_SYS */
+
+#ifdef HWLOC_FREEBSD_SYS
+extern void hwloc_set_freebsd_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_FREEBSD_SYS */
+
+#ifdef HWLOC_NETBSD_SYS
+extern void hwloc_set_netbsd_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_NETBSD_SYS */
+
+#ifdef HWLOC_HPUX_SYS
+extern void hwloc_set_hpux_hooks(struct hwloc_binding_hooks *binding_hooks, struct hwloc_topology_support *support);
+#endif /* HWLOC_HPUX_SYS */
+
+/* Insert uname-specific names/values in the object infos array */
+extern void hwloc_add_uname_info(struct hwloc_topology *topology);
+
+/* Free obj and its attributes assuming it doesn't have any children/parent anymore */
+extern void hwloc_free_unlinked_object(hwloc_obj_t obj);
+
+/* Duplicate src and its children under newparent in newtopology */
+extern void hwloc__duplicate_objects(struct hwloc_topology *newtopology, struct hwloc_obj *newparent, struct hwloc_obj *src);
+
+/* This can be used for the alloc field to get allocated data that can be freed by free() */
+void *hwloc_alloc_heap(hwloc_topology_t topology, size_t len);
+
+/* This can be used for the alloc field to get allocated data that can be freed by munmap() */
+void *hwloc_alloc_mmap(hwloc_topology_t topology, size_t len);
+
+/* This can be used for the free_membind field to free data using free() */
+int hwloc_free_heap(hwloc_topology_t topology, void *addr, size_t len);
+
+/* This can be used for the free_membind field to free data using munmap() */
+int hwloc_free_mmap(hwloc_topology_t topology, void *addr, size_t len);
+
+/* Allocates unbound memory or fail, depending on whether STRICT is requested
+ * or not */
+static __hwloc_inline void *
+hwloc_alloc_or_fail(hwloc_topology_t topology, size_t len, int flags)
+{
+ if (flags & HWLOC_MEMBIND_STRICT)
+ return NULL;
+ return hwloc_alloc(topology, len);
+}
+
+extern void hwloc_distances_init(struct hwloc_topology *topology);
+extern void hwloc_distances_destroy(struct hwloc_topology *topology);
+extern void hwloc_distances_set(struct hwloc_topology *topology, hwloc_obj_type_t type, unsigned nbobjs, unsigned *indexes, hwloc_obj_t *objs, float *distances, int force);
+extern void hwloc_distances_set_from_env(struct hwloc_topology *topology);
+extern void hwloc_distances_restrict_os(struct hwloc_topology *topology);
+extern void hwloc_distances_restrict(struct hwloc_topology *topology, unsigned long flags);
+extern void hwloc_distances_finalize_os(struct hwloc_topology *topology);
+extern void hwloc_distances_finalize_logical(struct hwloc_topology *topology);
+extern void hwloc_clear_object_distances(struct hwloc_obj *obj);
+extern void hwloc_clear_object_distances_one(struct hwloc_distances_s *distances);
+extern void hwloc_group_by_distances(struct hwloc_topology *topology);
+
+#ifdef HAVE_USELOCALE
+#include "locale.h"
+#ifdef HAVE_XLOCALE_H
+#include "xlocale.h"
+#endif
+#define hwloc_localeswitch_declare locale_t __old_locale = (locale_t)0, __new_locale
+#define hwloc_localeswitch_init() do { \
+ __new_locale = newlocale(LC_ALL_MASK, "C", (locale_t)0); \
+ if (__new_locale != (locale_t)0) \
+ __old_locale = uselocale(__new_locale); \
+} while (0)
+#define hwloc_localeswitch_fini() do { \
+ if (__new_locale != (locale_t)0) { \
+ uselocale(__old_locale); \
+ freelocale(__new_locale); \
+ } \
+} while(0)
+#else /* HAVE_USELOCALE */
+#define hwloc_localeswitch_declare int __dummy_nolocale __hwloc_attribute_unused
+#define hwloc_localeswitch_init()
+#define hwloc_localeswitch_fini()
+#endif /* HAVE_USELOCALE */
+
+#if !HAVE_DECL_FABSF
+#define fabsf(f) fabs((double)(f))
+#endif
+
+#if HAVE_DECL__SC_PAGE_SIZE
+#define hwloc_getpagesize() sysconf(_SC_PAGE_SIZE)
+#elif HAVE_DECL__SC_PAGESIZE
+#define hwloc_getpagesize() sysconf(_SC_PAGESIZE)
+#elif defined HAVE_GETPAGESIZE
+#define hwloc_getpagesize() getpagesize()
+#else
+#undef hwloc_getpagesize
+#endif
+
+/* encode src buffer into target buffer.
+ * targsize must be at least 4*((srclength+2)/3)+1.
+ * target will be 0-terminated.
+ */
+extern int hwloc_encode_to_base64(const char *src, size_t srclength, char *target, size_t targsize);
+/* decode src buffer into target buffer.
+ * src is 0-terminated.
+ * targsize must be at least srclength*3/4+1 (srclength not including \0)
+ * but only srclength*3/4 characters will be meaningful
+ * (the next one may be partially written during decoding, but it should be ignored).
+ */
+extern int hwloc_decode_from_base64(char const *src, char *target, size_t targsize);
+
+/* Check whether needle matches the beginning of haystack, at least n, and up
+ * to a colon or \0 */
+extern int hwloc_namecoloncmp(const char *haystack, const char *needle, size_t n);
+
+#ifdef HWLOC_HAVE_ATTRIBUTE_FORMAT
+# if HWLOC_HAVE_ATTRIBUTE_FORMAT
+# define __hwloc_attribute_format(type, str, arg) __attribute__((__format__(type, str, arg)))
+# else
+# define __hwloc_attribute_format(type, str, arg)
+# endif
+#else
+# define __hwloc_attribute_format(type, str, arg)
+#endif
+
+/* On some systems, snprintf returns the size of written data, not the actually
+ * required size. hwloc_snprintf always report the actually required size. */
+extern int hwloc_snprintf(char *str, size_t size, const char *format, ...) __hwloc_attribute_format(printf, 3, 4);
+
+extern void hwloc_obj_add_info_nodup(hwloc_obj_t obj, const char *name, const char *value, int nodup);
+
+#endif /* HWLOC_PRIVATE_H */
diff --git a/ext/hwloc/include/private/solaris-chiptype.h b/ext/hwloc/include/private/solaris-chiptype.h
new file mode 100644
index 000000000..b84555b3f
--- /dev/null
+++ b/ext/hwloc/include/private/solaris-chiptype.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
+ *
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+
+#ifdef HWLOC_INSIDE_PLUGIN
+/*
+ * these declarations are internal only, they are not available to plugins
+ * (functions below are internal static symbols).
+ */
+#error This file should not be used in plugins
+#endif
+
+
+#ifndef HWLOC_PRIVATE_SOLARIS_CHIPTYPE_H
+#define HWLOC_PRIVATE_SOLARIS_CHIPTYPE_H
+
+/* SPARC Chip Modes. */
+#define MODE_UNKNOWN 0
+#define MODE_SPITFIRE 1
+#define MODE_BLACKBIRD 2
+#define MODE_CHEETAH 3
+#define MODE_SPARC64_VI 4
+#define MODE_T1 5
+#define MODE_T2 6
+#define MODE_SPARC64_VII 7
+#define MODE_ROCK 8
+
+/* SPARC Chip Implementations. */
+#define IMPL_SPARC64_VI 0x6
+#define IMPL_SPARC64_VII 0x7
+#define IMPL_SPITFIRE 0x10
+#define IMPL_BLACKBIRD 0x11
+#define IMPL_SABRE 0x12
+#define IMPL_HUMMINGBIRD 0x13
+#define IMPL_CHEETAH 0x14
+#define IMPL_CHEETAHPLUS 0x15
+#define IMPL_JALAPENO 0x16
+#define IMPL_JAGUAR 0x18
+#define IMPL_PANTHER 0x19
+#define IMPL_NIAGARA 0x23
+#define IMPL_NIAGARA_2 0x24
+#define IMPL_ROCK 0x25
+
+/* Default Mfg, Cache, Speed settings */
+#define TI_MANUFACTURER 0x17
+#define TWO_MEG_CACHE 2097152
+#define SPITFIRE_SPEED 142943750
+
+char* hwloc_solaris_get_chip_type(void);
+char* hwloc_solaris_get_chip_model(void);
+
+#endif /* HWLOC_PRIVATE_SOLARIS_CHIPTYPE_H */
diff --git a/ext/hwloc/include/private/xml.h b/ext/hwloc/include/private/xml.h
new file mode 100644
index 000000000..fa59050f1
--- /dev/null
+++ b/ext/hwloc/include/private/xml.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright © 2009-2013 Inria. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#ifndef PRIVATE_XML_H
+#define PRIVATE_XML_H 1
+
+#include
+
+#include
+
+HWLOC_DECLSPEC int hwloc__xml_verbose(void);
+
+typedef struct hwloc__xml_import_state_s {
+ struct hwloc__xml_import_state_s *parent;
+
+ int (*next_attr)(struct hwloc__xml_import_state_s * state, char **namep, char **valuep);
+ int (*find_child)(struct hwloc__xml_import_state_s * state, struct hwloc__xml_import_state_s * childstate, char **tagp);
+ int (*close_tag)(struct hwloc__xml_import_state_s * state); /* look for an explicit closing tag */
+ void (*close_child)(struct hwloc__xml_import_state_s * state);
+ int (*get_content)(struct hwloc__xml_import_state_s * state, char **beginp, size_t expected_length);
+ void (*close_content)(struct hwloc__xml_import_state_s * state);
+
+ /* opaque data used to store backend-specific data.
+ * statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
+ */
+ char data[32];
+} * hwloc__xml_import_state_t;
+
+HWLOC_DECLSPEC int hwloc__xml_import_diff(hwloc__xml_import_state_t state, hwloc_topology_diff_t *firstdiffp);
+
+struct hwloc_xml_backend_data_s {
+ /* xml backend parameters */
+ int (*look_init)(struct hwloc_xml_backend_data_s *bdata, struct hwloc__xml_import_state_s *state);
+ void (*look_failed)(struct hwloc_xml_backend_data_s *bdata);
+ void (*backend_exit)(struct hwloc_xml_backend_data_s *bdata);
+ void *data; /* libxml2 doc, or nolibxml buffer */
+ struct hwloc_xml_imported_distances_s {
+ hwloc_obj_t root;
+ struct hwloc_distances_s distances;
+ struct hwloc_xml_imported_distances_s *prev, *next;
+ } *first_distances, *last_distances;
+};
+
+typedef struct hwloc__xml_export_state_s {
+ struct hwloc__xml_export_state_s *parent;
+
+ void (*new_child)(struct hwloc__xml_export_state_s *parentstate, struct hwloc__xml_export_state_s *state, const char *name);
+ void (*new_prop)(struct hwloc__xml_export_state_s *state, const char *name, const char *value);
+ void (*add_content)(struct hwloc__xml_export_state_s *state, const char *buffer, size_t length);
+ void (*end_object)(struct hwloc__xml_export_state_s *state, const char *name);
+
+ /* opaque data used to store backend-specific data.
+ * statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
+ */
+ char data[40];
+} * hwloc__xml_export_state_t;
+
+HWLOC_DECLSPEC void hwloc__xml_export_object (hwloc__xml_export_state_t state, struct hwloc_topology *topology, struct hwloc_obj *obj);
+
+HWLOC_DECLSPEC void hwloc__xml_export_diff(hwloc__xml_export_state_t parentstate, hwloc_topology_diff_t diff);
+
+/******************
+ * XML components *
+ ******************/
+
+struct hwloc_xml_callbacks {
+ int (*backend_init)(struct hwloc_xml_backend_data_s *bdata, const char *xmlpath, const char *xmlbuffer, int xmlbuflen);
+ int (*export_file)(struct hwloc_topology *topology, const char *filename);
+ int (*export_buffer)(struct hwloc_topology *topology, char **xmlbuffer, int *buflen);
+ void (*free_buffer)(void *xmlbuffer);
+ int (*import_diff)(const char *xmlpath, const char *xmlbuffer, int xmlbuflen, hwloc_topology_diff_t *diff, char **refnamep);
+ int (*export_diff_file)(union hwloc_topology_diff_u *diff, const char *refname, const char *filename);
+ int (*export_diff_buffer)(union hwloc_topology_diff_u *diff, const char *refname, char **xmlbuffer, int *buflen);
+};
+
+struct hwloc_xml_component {
+ struct hwloc_xml_callbacks *nolibxml_callbacks;
+ struct hwloc_xml_callbacks *libxml_callbacks;
+};
+
+HWLOC_DECLSPEC void hwloc_xml_callbacks_register(struct hwloc_xml_component *component);
+HWLOC_DECLSPEC void hwloc_xml_callbacks_reset(void);
+
+#endif /* PRIVATE_XML_H */
diff --git a/ext/hwloc/include/static-components.h b/ext/hwloc/include/static-components.h
new file mode 100644
index 000000000..6688fcd3b
--- /dev/null
+++ b/ext/hwloc/include/static-components.h
@@ -0,0 +1,21 @@
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_noos_component;
+//HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_component;
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_synthetic_component;
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_custom_component;
+//HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_nolibxml_component;
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_linux_component;
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_linuxpci_component;
+//HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_libxml_component;
+HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component;
+static const struct hwloc_component * hwloc_static_components[] = {
+ &hwloc_noos_component,
+ //&hwloc_xml_component,
+ &hwloc_synthetic_component,
+ &hwloc_custom_component,
+ //&hwloc_xml_nolibxml_component,
+ &hwloc_linux_component,
+ &hwloc_linuxpci_component,
+ //&hwloc_xml_libxml_component,
+ &hwloc_x86_component,
+ NULL
+};
diff --git a/ext/hwloc/src/base64.c b/ext/hwloc/src/base64.c
new file mode 100644
index 000000000..89cd00315
--- /dev/null
+++ b/ext/hwloc/src/base64.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright © 2012 Inria. All rights reserved.
+ * See COPYING in top-level directory.
+ *
+ * Modifications after import:
+ * - removed all #if
+ * - updated prototypes
+ * - updated #include
+ */
+
+/* $OpenBSD: base64.c,v 1.5 2006/10/21 09:55:03 otto Exp $ */
+
+/*
+ * Copyright (c) 1996 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
+ * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
+ * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+/*
+ * Portions Copyright (c) 1995 by International Business Machines, Inc.
+ *
+ * International Business Machines, Inc. (hereinafter called IBM) grants
+ * permission under its copyrights to use, copy, modify, and distribute this
+ * Software with or without fee, provided that the above copyright notice and
+ * all paragraphs of this notice appear in all copies, and that the name of IBM
+ * not be used in connection with the marketing of any product incorporating
+ * the Software or modifications thereof, without specific, written prior
+ * permission.
+ *
+ * To the extent it has a right to do so, IBM grants an immunity from suit
+ * under its patents, if any, for the use, sale or manufacture of products to
+ * the extent that such products are used for performing Domain Name System
+ * dynamic updates in TCP/IP networks by means of the Software. No immunity is
+ * granted for any product per se or for any other function of any product.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
+ * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
+ * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
+ */
+
+/* OPENBSD ORIGINAL: lib/libc/net/base64.c */
+
+static const char Base64[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char Pad64 = '=';
+
+/* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
+ The following encoding technique is taken from RFC 1521 by Borenstein
+ and Freed. It is reproduced here in a slightly edited form for
+ convenience.
+
+ A 65-character subset of US-ASCII is used, enabling 6 bits to be
+ represented per printable character. (The extra 65th character, "=",
+ is used to signify a special processing function.)
+
+ The encoding process represents 24-bit groups of input bits as output
+ strings of 4 encoded characters. Proceeding from left to right, a
+ 24-bit input group is formed by concatenating 3 8-bit input groups.
+ These 24 bits are then treated as 4 concatenated 6-bit groups, each
+ of which is translated into a single digit in the base64 alphabet.
+
+ Each 6-bit group is used as an index into an array of 64 printable
+ characters. The character referenced by the index is placed in the
+ output string.
+
+ Table 1: The Base64 Alphabet
+
+ Value Encoding Value Encoding Value Encoding Value Encoding
+ 0 A 17 R 34 i 51 z
+ 1 B 18 S 35 j 52 0
+ 2 C 19 T 36 k 53 1
+ 3 D 20 U 37 l 54 2
+ 4 E 21 V 38 m 55 3
+ 5 F 22 W 39 n 56 4
+ 6 G 23 X 40 o 57 5
+ 7 H 24 Y 41 p 58 6
+ 8 I 25 Z 42 q 59 7
+ 9 J 26 a 43 r 60 8
+ 10 K 27 b 44 s 61 9
+ 11 L 28 c 45 t 62 +
+ 12 M 29 d 46 u 63 /
+ 13 N 30 e 47 v
+ 14 O 31 f 48 w (pad) =
+ 15 P 32 g 49 x
+ 16 Q 33 h 50 y
+
+ Special processing is performed if fewer than 24 bits are available
+ at the end of the data being encoded. A full encoding quantum is
+ always completed at the end of a quantity. When fewer than 24 input
+ bits are available in an input group, zero bits are added (on the
+ right) to form an integral number of 6-bit groups. Padding at the
+ end of the data is performed using the '=' character.
+
+ Since all base64 input is an integral number of octets, only the
+ -------------------------------------------------
+ following cases can arise:
+
+ (1) the final quantum of encoding input is an integral
+ multiple of 24 bits; here, the final unit of encoded
+ output will be an integral multiple of 4 characters
+ with no "=" padding,
+ (2) the final quantum of encoding input is exactly 8 bits;
+ here, the final unit of encoded output will be two
+ characters followed by two "=" padding characters, or
+ (3) the final quantum of encoding input is exactly 16 bits;
+ here, the final unit of encoded output will be three
+ characters followed by one "=" padding character.
+ */
+
+#include
+#include
+#include
+
+#include
+
+int
+hwloc_encode_to_base64(const char *src, size_t srclength, char *target, size_t targsize)
+{
+ size_t datalength = 0;
+ unsigned char input[3];
+ unsigned char output[4];
+ unsigned int i;
+
+ while (2 < srclength) {
+ input[0] = *src++;
+ input[1] = *src++;
+ input[2] = *src++;
+ srclength -= 3;
+
+ output[0] = input[0] >> 2;
+ output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
+ output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
+ output[3] = input[2] & 0x3f;
+
+ if (datalength + 4 > targsize)
+ return (-1);
+ target[datalength++] = Base64[output[0]];
+ target[datalength++] = Base64[output[1]];
+ target[datalength++] = Base64[output[2]];
+ target[datalength++] = Base64[output[3]];
+ }
+
+ /* Now we worry about padding. */
+ if (0 != srclength) {
+ /* Get what's left. */
+ input[0] = input[1] = input[2] = '\0';
+ for (i = 0; i < srclength; i++)
+ input[i] = *src++;
+
+ output[0] = input[0] >> 2;
+ output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
+ output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
+
+ if (datalength + 4 > targsize)
+ return (-1);
+ target[datalength++] = Base64[output[0]];
+ target[datalength++] = Base64[output[1]];
+ if (srclength == 1)
+ target[datalength++] = Pad64;
+ else
+ target[datalength++] = Base64[output[2]];
+ target[datalength++] = Pad64;
+ }
+ if (datalength >= targsize)
+ return (-1);
+ target[datalength] = '\0'; /* Returned value doesn't count \0. */
+ return (datalength);
+}
+
+/* skips all whitespace anywhere.
+ converts characters, four at a time, starting at (or after)
+ src from base - 64 numbers into three 8 bit bytes in the target area.
+ it returns the number of data bytes stored at the target, or -1 on error.
+ */
+
+int
+hwloc_decode_from_base64(char const *src, char *target, size_t targsize)
+{
+ unsigned int tarindex, state;
+ int ch;
+ char *pos;
+
+ state = 0;
+ tarindex = 0;
+
+ while ((ch = *src++) != '\0') {
+ if (isspace(ch)) /* Skip whitespace anywhere. */
+ continue;
+
+ if (ch == Pad64)
+ break;
+
+ pos = strchr(Base64, ch);
+ if (pos == 0) /* A non-base64 character. */
+ return (-1);
+
+ switch (state) {
+ case 0:
+ if (target) {
+ if (tarindex >= targsize)
+ return (-1);
+ target[tarindex] = (pos - Base64) << 2;
+ }
+ state = 1;
+ break;
+ case 1:
+ if (target) {
+ if (tarindex + 1 >= targsize)
+ return (-1);
+ target[tarindex] |= (pos - Base64) >> 4;
+ target[tarindex+1] = ((pos - Base64) & 0x0f)
+ << 4 ;
+ }
+ tarindex++;
+ state = 2;
+ break;
+ case 2:
+ if (target) {
+ if (tarindex + 1 >= targsize)
+ return (-1);
+ target[tarindex] |= (pos - Base64) >> 2;
+ target[tarindex+1] = ((pos - Base64) & 0x03)
+ << 6;
+ }
+ tarindex++;
+ state = 3;
+ break;
+ case 3:
+ if (target) {
+ if (tarindex >= targsize)
+ return (-1);
+ target[tarindex] |= (pos - Base64);
+ }
+ tarindex++;
+ state = 0;
+ break;
+ }
+ }
+
+ /*
+ * We are done decoding Base-64 chars. Let's see if we ended
+ * on a byte boundary, and/or with erroneous trailing characters.
+ */
+
+ if (ch == Pad64) { /* We got a pad char. */
+ ch = *src++; /* Skip it, get next. */
+ switch (state) {
+ case 0: /* Invalid = in first position */
+ case 1: /* Invalid = in second position */
+ return (-1);
+
+ case 2: /* Valid, means one byte of info */
+ /* Skip any number of spaces. */
+ for (; ch != '\0'; ch = *src++)
+ if (!isspace(ch))
+ break;
+ /* Make sure there is another trailing = sign. */
+ if (ch != Pad64)
+ return (-1);
+ ch = *src++; /* Skip the = */
+ /* Fall through to "single trailing =" case. */
+ /* FALLTHROUGH */
+
+ case 3: /* Valid, means two bytes of info */
+ /*
+ * We know this char is an =. Is there anything but
+ * whitespace after it?
+ */
+ for (; ch != '\0'; ch = *src++)
+ if (!isspace(ch))
+ return (-1);
+
+ /*
+ * Now make sure for cases 2 and 3 that the "extra"
+ * bits that slopped past the last full byte were
+ * zeros. If we don't check them, they become a
+ * subliminal channel.
+ */
+ if (target && target[tarindex] != 0)
+ return (-1);
+ }
+ } else {
+ /*
+ * We ended by seeing the end of the string. Make sure we
+ * have no partial bytes lying around.
+ */
+ if (state != 0)
+ return (-1);
+ }
+
+ return (tarindex);
+}
diff --git a/ext/hwloc/src/bind.c b/ext/hwloc/src/bind.c
new file mode 100644
index 000000000..37921bcee
--- /dev/null
+++ b/ext/hwloc/src/bind.c
@@ -0,0 +1,781 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2011 inria. All rights reserved.
+ * Copyright © 2009-2010, 2012 Université Bordeaux 1
+ * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include
+#include
+#include
+#include
+#ifdef HAVE_SYS_MMAN_H
+# include
+#endif
+/* is only needed if we don't have posix_memalign() */
+#if defined(hwloc_getpagesize) && !defined(HAVE_POSIX_MEMALIGN) && defined(HAVE_MEMALIGN) && defined(HAVE_MALLOC_H)
+#include
+#endif
+#ifdef HAVE_UNISTD_H
+#include
+#endif
+#include
+#include
+
+/* TODO: HWLOC_GNU_SYS, HWLOC_IRIX_SYS,
+ *
+ * IRIX: see MP_MUSTRUN / _DSM_MUSTRUN, pthread_setrunon_np, /hw, procss_cpulink, numa_create
+ *
+ * We could use glibc's sched_setaffinity generically when it is available
+ *
+ * Darwin and OpenBSD don't seem to have binding facilities.
+ */
+
+static hwloc_const_bitmap_t
+hwloc_fix_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t set)
+{
+ hwloc_const_bitmap_t topology_set = hwloc_topology_get_topology_cpuset(topology);
+ hwloc_const_bitmap_t complete_set = hwloc_topology_get_complete_cpuset(topology);
+
+ if (!topology_set) {
+ /* The topology is composed of several systems, the cpuset is ambiguous. */
+ errno = EXDEV;
+ return NULL;
+ }
+
+ if (hwloc_bitmap_iszero(set)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (!hwloc_bitmap_isincluded(set, complete_set)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (hwloc_bitmap_isincluded(topology_set, set))
+ set = complete_set;
+
+ return set;
+}
+
+int
+hwloc_set_cpubind(hwloc_topology_t topology, hwloc_const_bitmap_t set, int flags)
+{
+ set = hwloc_fix_cpubind(topology, set);
+ if (!set)
+ return -1;
+
+ if (flags & HWLOC_CPUBIND_PROCESS) {
+ if (topology->binding_hooks.set_thisproc_cpubind)
+ return topology->binding_hooks.set_thisproc_cpubind(topology, set, flags);
+ } else if (flags & HWLOC_CPUBIND_THREAD) {
+ if (topology->binding_hooks.set_thisthread_cpubind)
+ return topology->binding_hooks.set_thisthread_cpubind(topology, set, flags);
+ } else {
+ if (topology->binding_hooks.set_thisproc_cpubind)
+ return topology->binding_hooks.set_thisproc_cpubind(topology, set, flags);
+ else if (topology->binding_hooks.set_thisthread_cpubind)
+ return topology->binding_hooks.set_thisthread_cpubind(topology, set, flags);
+ }
+
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+hwloc_get_cpubind(hwloc_topology_t topology, hwloc_bitmap_t set, int flags)
+{
+ if (flags & HWLOC_CPUBIND_PROCESS) {
+ if (topology->binding_hooks.get_thisproc_cpubind)
+ return topology->binding_hooks.get_thisproc_cpubind(topology, set, flags);
+ } else if (flags & HWLOC_CPUBIND_THREAD) {
+ if (topology->binding_hooks.get_thisthread_cpubind)
+ return topology->binding_hooks.get_thisthread_cpubind(topology, set, flags);
+ } else {
+ if (topology->binding_hooks.get_thisproc_cpubind)
+ return topology->binding_hooks.get_thisproc_cpubind(topology, set, flags);
+ else if (topology->binding_hooks.get_thisthread_cpubind)
+ return topology->binding_hooks.get_thisthread_cpubind(topology, set, flags);
+ }
+
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+hwloc_set_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_bitmap_t set, int flags)
+{
+ set = hwloc_fix_cpubind(topology, set);
+ if (!set)
+ return -1;
+
+ if (topology->binding_hooks.set_proc_cpubind)
+ return topology->binding_hooks.set_proc_cpubind(topology, pid, set, flags);
+
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+hwloc_get_proc_cpubind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, int flags)
+{
+ if (topology->binding_hooks.get_proc_cpubind)
+ return topology->binding_hooks.get_proc_cpubind(topology, pid, set, flags);
+
+ errno = ENOSYS;
+ return -1;
+}
+
+#ifdef hwloc_thread_t
+int
+hwloc_set_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_const_bitmap_t set, int flags)
+{
+ set = hwloc_fix_cpubind(topology, set);
+ if (!set)
+ return -1;
+
+ if (topology->binding_hooks.set_thread_cpubind)
+ return topology->binding_hooks.set_thread_cpubind(topology, tid, set, flags);
+
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+hwloc_get_thread_cpubind(hwloc_topology_t topology, hwloc_thread_t tid, hwloc_bitmap_t set, int flags)
+{
+ if (topology->binding_hooks.get_thread_cpubind)
+ return topology->binding_hooks.get_thread_cpubind(topology, tid, set, flags);
+
+ errno = ENOSYS;
+ return -1;
+}
+#endif
+
+int
+hwloc_get_last_cpu_location(hwloc_topology_t topology, hwloc_bitmap_t set, int flags)
+{
+ if (flags & HWLOC_CPUBIND_PROCESS) {
+ if (topology->binding_hooks.get_thisproc_last_cpu_location)
+ return topology->binding_hooks.get_thisproc_last_cpu_location(topology, set, flags);
+ } else if (flags & HWLOC_CPUBIND_THREAD) {
+ if (topology->binding_hooks.get_thisthread_last_cpu_location)
+ return topology->binding_hooks.get_thisthread_last_cpu_location(topology, set, flags);
+ } else {
+ if (topology->binding_hooks.get_thisproc_last_cpu_location)
+ return topology->binding_hooks.get_thisproc_last_cpu_location(topology, set, flags);
+ else if (topology->binding_hooks.get_thisthread_last_cpu_location)
+ return topology->binding_hooks.get_thisthread_last_cpu_location(topology, set, flags);
+ }
+
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+hwloc_get_proc_last_cpu_location(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_bitmap_t set, int flags)
+{
+ if (topology->binding_hooks.get_proc_last_cpu_location)
+ return topology->binding_hooks.get_proc_last_cpu_location(topology, pid, set, flags);
+
+ errno = ENOSYS;
+ return -1;
+}
+
+static hwloc_const_nodeset_t
+hwloc_fix_membind(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset)
+{
+ hwloc_const_bitmap_t topology_nodeset = hwloc_topology_get_topology_nodeset(topology);
+ hwloc_const_bitmap_t complete_nodeset = hwloc_topology_get_complete_nodeset(topology);
+
+ if (!hwloc_topology_get_topology_cpuset(topology)) {
+ /* The topology is composed of several systems, the nodeset is thus
+ * ambiguous. */
+ errno = EXDEV;
+ return NULL;
+ }
+
+ if (!complete_nodeset) {
+ /* There is no NUMA node */
+ errno = ENODEV;
+ return NULL;
+ }
+
+ if (hwloc_bitmap_iszero(nodeset)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (!hwloc_bitmap_isincluded(nodeset, complete_nodeset)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (hwloc_bitmap_isincluded(topology_nodeset, nodeset))
+ return complete_nodeset;
+
+ return nodeset;
+}
+
+static int
+hwloc_fix_membind_cpuset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_const_cpuset_t cpuset)
+{
+ hwloc_const_bitmap_t topology_set = hwloc_topology_get_topology_cpuset(topology);
+ hwloc_const_bitmap_t complete_set = hwloc_topology_get_complete_cpuset(topology);
+ hwloc_const_bitmap_t complete_nodeset = hwloc_topology_get_complete_nodeset(topology);
+
+ if (!topology_set) {
+ /* The topology is composed of several systems, the cpuset is thus
+ * ambiguous. */
+ errno = EXDEV;
+ return -1;
+ }
+
+ if (!complete_nodeset) {
+ /* There is no NUMA node */
+ errno = ENODEV;
+ return -1;
+ }
+
+ if (hwloc_bitmap_iszero(cpuset)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (!hwloc_bitmap_isincluded(cpuset, complete_set)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (hwloc_bitmap_isincluded(topology_set, cpuset)) {
+ hwloc_bitmap_copy(nodeset, complete_nodeset);
+ return 0;
+ }
+
+ hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);
+ return 0;
+}
+
+int
+hwloc_set_membind_nodeset(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+ nodeset = hwloc_fix_membind(topology, nodeset);
+ if (!nodeset)
+ return -1;
+
+ if (flags & HWLOC_MEMBIND_PROCESS) {
+ if (topology->binding_hooks.set_thisproc_membind)
+ return topology->binding_hooks.set_thisproc_membind(topology, nodeset, policy, flags);
+ } else if (flags & HWLOC_MEMBIND_THREAD) {
+ if (topology->binding_hooks.set_thisthread_membind)
+ return topology->binding_hooks.set_thisthread_membind(topology, nodeset, policy, flags);
+ } else {
+ if (topology->binding_hooks.set_thisproc_membind)
+ return topology->binding_hooks.set_thisproc_membind(topology, nodeset, policy, flags);
+ else if (topology->binding_hooks.set_thisthread_membind)
+ return topology->binding_hooks.set_thisthread_membind(topology, nodeset, policy, flags);
+ }
+
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+hwloc_set_membind(hwloc_topology_t topology, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags)
+{
+ hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+ int ret;
+
+ if (hwloc_fix_membind_cpuset(topology, nodeset, set))
+ ret = -1;
+ else
+ ret = hwloc_set_membind_nodeset(topology, nodeset, policy, flags);
+
+ hwloc_bitmap_free(nodeset);
+ return ret;
+}
+
+int
+hwloc_get_membind_nodeset(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+ if (flags & HWLOC_MEMBIND_PROCESS) {
+ if (topology->binding_hooks.get_thisproc_membind)
+ return topology->binding_hooks.get_thisproc_membind(topology, nodeset, policy, flags);
+ } else if (flags & HWLOC_MEMBIND_THREAD) {
+ if (topology->binding_hooks.get_thisthread_membind)
+ return topology->binding_hooks.get_thisthread_membind(topology, nodeset, policy, flags);
+ } else {
+ if (topology->binding_hooks.get_thisproc_membind)
+ return topology->binding_hooks.get_thisproc_membind(topology, nodeset, policy, flags);
+ else if (topology->binding_hooks.get_thisthread_membind)
+ return topology->binding_hooks.get_thisthread_membind(topology, nodeset, policy, flags);
+ }
+
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+hwloc_get_membind(hwloc_topology_t topology, hwloc_cpuset_t set, hwloc_membind_policy_t * policy, int flags)
+{
+ hwloc_nodeset_t nodeset;
+ int ret;
+
+ nodeset = hwloc_bitmap_alloc();
+ ret = hwloc_get_membind_nodeset(topology, nodeset, policy, flags);
+
+ if (!ret)
+ hwloc_cpuset_from_nodeset(topology, set, nodeset);
+
+ hwloc_bitmap_free(nodeset);
+ return ret;
+}
+
+int
+hwloc_set_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+ nodeset = hwloc_fix_membind(topology, nodeset);
+ if (!nodeset)
+ return -1;
+
+ if (topology->binding_hooks.set_proc_membind)
+ return topology->binding_hooks.set_proc_membind(topology, pid, nodeset, policy, flags);
+
+ errno = ENOSYS;
+ return -1;
+}
+
+
+int
+hwloc_set_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags)
+{
+ hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+ int ret;
+
+ if (hwloc_fix_membind_cpuset(topology, nodeset, set))
+ ret = -1;
+ else
+ ret = hwloc_set_proc_membind_nodeset(topology, pid, nodeset, policy, flags);
+
+ hwloc_bitmap_free(nodeset);
+ return ret;
+}
+
+int
+hwloc_get_proc_membind_nodeset(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+ if (topology->binding_hooks.get_proc_membind)
+ return topology->binding_hooks.get_proc_membind(topology, pid, nodeset, policy, flags);
+
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+hwloc_get_proc_membind(hwloc_topology_t topology, hwloc_pid_t pid, hwloc_cpuset_t set, hwloc_membind_policy_t * policy, int flags)
+{
+ hwloc_nodeset_t nodeset;
+ int ret;
+
+ nodeset = hwloc_bitmap_alloc();
+ ret = hwloc_get_proc_membind_nodeset(topology, pid, nodeset, policy, flags);
+
+ if (!ret)
+ hwloc_cpuset_from_nodeset(topology, set, nodeset);
+
+ hwloc_bitmap_free(nodeset);
+ return ret;
+}
+
+int
+hwloc_set_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+ nodeset = hwloc_fix_membind(topology, nodeset);
+ if (!nodeset)
+ return -1;
+
+ if (topology->binding_hooks.set_area_membind)
+ return topology->binding_hooks.set_area_membind(topology, addr, len, nodeset, policy, flags);
+
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+hwloc_set_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags)
+{
+ hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+ int ret;
+
+ if (hwloc_fix_membind_cpuset(topology, nodeset, set))
+ ret = -1;
+ else
+ ret = hwloc_set_area_membind_nodeset(topology, addr, len, nodeset, policy, flags);
+
+ hwloc_bitmap_free(nodeset);
+ return ret;
+}
+
+int
+hwloc_get_area_membind_nodeset(hwloc_topology_t topology, const void *addr, size_t len, hwloc_nodeset_t nodeset, hwloc_membind_policy_t * policy, int flags)
+{
+ if (topology->binding_hooks.get_area_membind)
+ return topology->binding_hooks.get_area_membind(topology, addr, len, nodeset, policy, flags);
+
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+hwloc_get_area_membind(hwloc_topology_t topology, const void *addr, size_t len, hwloc_cpuset_t set, hwloc_membind_policy_t * policy, int flags)
+{
+ hwloc_nodeset_t nodeset;
+ int ret;
+
+ nodeset = hwloc_bitmap_alloc();
+ ret = hwloc_get_area_membind_nodeset(topology, addr, len, nodeset, policy, flags);
+
+ if (!ret)
+ hwloc_cpuset_from_nodeset(topology, set, nodeset);
+
+ hwloc_bitmap_free(nodeset);
+ return ret;
+}
+
+void *
+hwloc_alloc_heap(hwloc_topology_t topology __hwloc_attribute_unused, size_t len)
+{
+ void *p;
+#if defined(hwloc_getpagesize) && defined(HAVE_POSIX_MEMALIGN)
+ errno = posix_memalign(&p, hwloc_getpagesize(), len);
+ if (errno)
+ p = NULL;
+#elif defined(hwloc_getpagesize) && defined(HAVE_MEMALIGN)
+ p = memalign(hwloc_getpagesize(), len);
+#else
+ p = malloc(len);
+#endif
+ return p;
+}
+
+#ifdef MAP_ANONYMOUS
+void *
+hwloc_alloc_mmap(hwloc_topology_t topology __hwloc_attribute_unused, size_t len)
+{
+ return mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+}
+#endif
+
+int
+hwloc_free_heap(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len __hwloc_attribute_unused)
+{
+ free(addr);
+ return 0;
+}
+
+#ifdef MAP_ANONYMOUS
+int
+hwloc_free_mmap(hwloc_topology_t topology __hwloc_attribute_unused, void *addr, size_t len)
+{
+ if (!addr)
+ return 0;
+ return munmap(addr, len);
+}
+#endif
+
+void *
+hwloc_alloc(hwloc_topology_t topology, size_t len)
+{
+ if (topology->binding_hooks.alloc)
+ return topology->binding_hooks.alloc(topology, len);
+ return hwloc_alloc_heap(topology, len);
+}
+
+void *
+hwloc_alloc_membind_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
+{
+ void *p;
+ nodeset = hwloc_fix_membind(topology, nodeset);
+ if (!nodeset)
+ goto fallback;
+ if (flags & HWLOC_MEMBIND_MIGRATE) {
+ errno = EINVAL;
+ goto fallback;
+ }
+
+ if (topology->binding_hooks.alloc_membind)
+ return topology->binding_hooks.alloc_membind(topology, len, nodeset, policy, flags);
+ else if (topology->binding_hooks.set_area_membind) {
+ p = hwloc_alloc(topology, len);
+ if (!p)
+ return NULL;
+ if (topology->binding_hooks.set_area_membind(topology, p, len, nodeset, policy, flags) && flags & HWLOC_MEMBIND_STRICT) {
+ int error = errno;
+ free(p);
+ errno = error;
+ return NULL;
+ }
+ return p;
+ } else {
+ errno = ENOSYS;
+ }
+
+fallback:
+ if (flags & HWLOC_MEMBIND_STRICT)
+ /* Report error */
+ return NULL;
+ /* Never mind, allocate anyway */
+ return hwloc_alloc(topology, len);
+}
+
+void *
+hwloc_alloc_membind(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t set, hwloc_membind_policy_t policy, int flags)
+{
+ hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+ void *ret;
+
+ if (hwloc_fix_membind_cpuset(topology, nodeset, set)) {
+ if (flags & HWLOC_MEMBIND_STRICT)
+ ret = NULL;
+ else
+ ret = hwloc_alloc(topology, len);
+ } else
+ ret = hwloc_alloc_membind_nodeset(topology, len, nodeset, policy, flags);
+
+ hwloc_bitmap_free(nodeset);
+ return ret;
+}
+
+int
+hwloc_free(hwloc_topology_t topology, void *addr, size_t len)
+{
+ if (topology->binding_hooks.free_membind)
+ return topology->binding_hooks.free_membind(topology, addr, len);
+ return hwloc_free_heap(topology, addr, len);
+}
+
+/*
+ * Empty binding hooks always returning success
+ */
+
+static int dontset_return_complete_cpuset(hwloc_topology_t topology, hwloc_cpuset_t set)
+{
+ hwloc_const_cpuset_t cpuset = hwloc_topology_get_complete_cpuset(topology);
+ if (cpuset) {
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
+ return 0;
+ } else
+ return -1;
+}
+
+static int dontset_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+ return 0;
+}
+static int dontget_thisthread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, int flags __hwloc_attribute_unused)
+{
+ return dontset_return_complete_cpuset(topology, set);
+}
+static int dontset_thisproc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+ return 0;
+}
+static int dontget_thisproc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, int flags __hwloc_attribute_unused)
+{
+ return dontset_return_complete_cpuset(topology, set);
+}
+static int dontset_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+ return 0;
+}
+static int dontget_proc_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_bitmap_t cpuset, int flags __hwloc_attribute_unused)
+{
+ return dontset_return_complete_cpuset(topology, cpuset);
+}
+#ifdef hwloc_thread_t
+static int dontset_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+ return 0;
+}
+static int dontget_thread_cpubind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_thread_t tid __hwloc_attribute_unused, hwloc_bitmap_t cpuset, int flags __hwloc_attribute_unused)
+{
+ return dontset_return_complete_cpuset(topology, cpuset);
+}
+#endif
+
+static int dontset_return_complete_nodeset(hwloc_topology_t topology, hwloc_nodeset_t set, hwloc_membind_policy_t *policy)
+{
+ hwloc_const_nodeset_t nodeset = hwloc_topology_get_complete_nodeset(topology);
+ if (nodeset) {
+ hwloc_bitmap_copy(set, hwloc_topology_get_complete_nodeset(topology));
+ *policy = HWLOC_MEMBIND_DEFAULT;
+ return 0;
+ } else
+ return -1;
+}
+
+static int dontset_thisproc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+ return 0;
+}
+static int dontget_thisproc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
+{
+ return dontset_return_complete_nodeset(topology, set, policy);
+}
+
+static int dontset_thisthread_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+ return 0;
+}
+static int dontget_thisthread_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
+{
+ return dontset_return_complete_nodeset(topology, set, policy);
+}
+
+static int dontset_proc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+ return 0;
+}
+static int dontget_proc_membind(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_pid_t pid __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
+{
+ return dontset_return_complete_nodeset(topology, set, policy);
+}
+
+static int dontset_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+ return 0;
+}
+static int dontget_area_membind(hwloc_topology_t topology __hwloc_attribute_unused, const void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_bitmap_t set, hwloc_membind_policy_t * policy, int flags __hwloc_attribute_unused)
+{
+ return dontset_return_complete_nodeset(topology, set, policy);
+}
+
+static void * dontalloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t size __hwloc_attribute_unused, hwloc_const_bitmap_t set __hwloc_attribute_unused, hwloc_membind_policy_t policy __hwloc_attribute_unused, int flags __hwloc_attribute_unused)
+{
+ return malloc(size);
+}
+static int dontfree_membind(hwloc_topology_t topology __hwloc_attribute_unused, void *addr __hwloc_attribute_unused, size_t size __hwloc_attribute_unused)
+{
+ free(addr);
+ return 0;
+}
+
+static void hwloc_set_dummy_hooks(struct hwloc_binding_hooks *hooks,
+ struct hwloc_topology_support *support __hwloc_attribute_unused)
+{
+ hooks->set_thisproc_cpubind = dontset_thisproc_cpubind;
+ hooks->get_thisproc_cpubind = dontget_thisproc_cpubind;
+ hooks->set_thisthread_cpubind = dontset_thisthread_cpubind;
+ hooks->get_thisthread_cpubind = dontget_thisthread_cpubind;
+ hooks->set_proc_cpubind = dontset_proc_cpubind;
+ hooks->get_proc_cpubind = dontget_proc_cpubind;
+#ifdef hwloc_thread_t
+ hooks->set_thread_cpubind = dontset_thread_cpubind;
+ hooks->get_thread_cpubind = dontget_thread_cpubind;
+#endif
+ hooks->get_thisproc_last_cpu_location = dontget_thisproc_cpubind; /* cpubind instead of last_cpu_location is ok */
+ hooks->get_thisthread_last_cpu_location = dontget_thisthread_cpubind; /* cpubind instead of last_cpu_location is ok */
+ hooks->get_proc_last_cpu_location = dontget_proc_cpubind; /* cpubind instead of last_cpu_location is ok */
+ /* TODO: get_thread_last_cpu_location */
+ hooks->set_thisproc_membind = dontset_thisproc_membind;
+ hooks->get_thisproc_membind = dontget_thisproc_membind;
+ hooks->set_thisthread_membind = dontset_thisthread_membind;
+ hooks->get_thisthread_membind = dontget_thisthread_membind;
+ hooks->set_proc_membind = dontset_proc_membind;
+ hooks->get_proc_membind = dontget_proc_membind;
+ hooks->set_area_membind = dontset_area_membind;
+ hooks->get_area_membind = dontget_area_membind;
+ hooks->alloc_membind = dontalloc_membind;
+ hooks->free_membind = dontfree_membind;
+}
+
+void
+hwloc_set_native_binding_hooks(struct hwloc_binding_hooks *hooks, struct hwloc_topology_support *support)
+{
+# ifdef HWLOC_LINUX_SYS
+ hwloc_set_linuxfs_hooks(hooks, support);
+# endif /* HWLOC_LINUX_SYS */
+
+# ifdef HWLOC_BGQ_SYS
+ hwloc_set_bgq_hooks(hooks, support);
+# endif /* HWLOC_BGQ_SYS */
+
+# ifdef HWLOC_AIX_SYS
+ hwloc_set_aix_hooks(hooks, support);
+# endif /* HWLOC_AIX_SYS */
+
+# ifdef HWLOC_OSF_SYS
+ hwloc_set_osf_hooks(hooks, support);
+# endif /* HWLOC_OSF_SYS */
+
+# ifdef HWLOC_SOLARIS_SYS
+ hwloc_set_solaris_hooks(hooks, support);
+# endif /* HWLOC_SOLARIS_SYS */
+
+# ifdef HWLOC_WIN_SYS
+ hwloc_set_windows_hooks(hooks, support);
+# endif /* HWLOC_WIN_SYS */
+
+# ifdef HWLOC_DARWIN_SYS
+ hwloc_set_darwin_hooks(hooks, support);
+# endif /* HWLOC_DARWIN_SYS */
+
+# ifdef HWLOC_FREEBSD_SYS
+ hwloc_set_freebsd_hooks(hooks, support);
+# endif /* HWLOC_FREEBSD_SYS */
+
+# ifdef HWLOC_NETBSD_SYS
+ hwloc_set_netbsd_hooks(hooks, support);
+# endif /* HWLOC_NETBSD_SYS */
+
+# ifdef HWLOC_HPUX_SYS
+ hwloc_set_hpux_hooks(hooks, support);
+# endif /* HWLOC_HPUX_SYS */
+}
+
+/* If the represented system is actually not this system, use dummy binding hooks. */
+void
+hwloc_set_binding_hooks(struct hwloc_topology *topology)
+{
+ if (topology->is_thissystem) {
+ hwloc_set_native_binding_hooks(&topology->binding_hooks, &topology->support);
+ /* every hook not set above will return ENOSYS */
+ } else {
+ /* not this system, use dummy binding hooks that do nothing (but don't return ENOSYS) */
+ hwloc_set_dummy_hooks(&topology->binding_hooks, &topology->support);
+ }
+
+ /* if not is_thissystem, set_cpubind is fake
+ * and get_cpubind returns the whole system cpuset,
+ * so don't report that set/get_cpubind as supported
+ */
+ if (topology->is_thissystem) {
+#define DO(which,kind) \
+ if (topology->binding_hooks.kind) \
+ topology->support.which##bind->kind = 1;
+ DO(cpu,set_thisproc_cpubind);
+ DO(cpu,get_thisproc_cpubind);
+ DO(cpu,set_proc_cpubind);
+ DO(cpu,get_proc_cpubind);
+ DO(cpu,set_thisthread_cpubind);
+ DO(cpu,get_thisthread_cpubind);
+#ifdef hwloc_thread_t
+ DO(cpu,set_thread_cpubind);
+ DO(cpu,get_thread_cpubind);
+#endif
+ DO(cpu,get_thisproc_last_cpu_location);
+ DO(cpu,get_proc_last_cpu_location);
+ DO(cpu,get_thisthread_last_cpu_location);
+ DO(mem,set_thisproc_membind);
+ DO(mem,get_thisproc_membind);
+ DO(mem,set_thisthread_membind);
+ DO(mem,get_thisthread_membind);
+ DO(mem,set_proc_membind);
+ DO(mem,get_proc_membind);
+ DO(mem,set_area_membind);
+ DO(mem,get_area_membind);
+ DO(mem,alloc_membind);
+ }
+}
diff --git a/ext/hwloc/src/bitmap.c b/ext/hwloc/src/bitmap.c
new file mode 100644
index 000000000..39f4dbfe3
--- /dev/null
+++ b/ext/hwloc/src/bitmap.c
@@ -0,0 +1,1163 @@
+/*
+ * Copyright © 2009 CNRS
+ * Copyright © 2009-2011 inria. All rights reserved.
+ * Copyright © 2009-2011 Université Bordeaux 1
+ * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
+ * See COPYING in top-level directory.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+/* TODO
+ * - have a way to change the initial allocation size
+ * - preallocate inside the bitmap structure (so that the whole structure is a cacheline for instance)
+ * and allocate a dedicated array only later when reallocating larger
+ */
+
+/* magic number */
+#define HWLOC_BITMAP_MAGIC 0x20091007
+
+/* actual opaque type internals */
+struct hwloc_bitmap_s {
+ unsigned ulongs_count; /* how many ulong bitmasks are valid, >= 1 */
+ unsigned ulongs_allocated; /* how many ulong bitmasks are allocated, >= ulongs_count */
+ unsigned long *ulongs;
+ int infinite; /* set to 1 if all bits beyond ulongs are set */
+#ifdef HWLOC_DEBUG
+ int magic;
+#endif
+};
+
+/* overzealous check in debug-mode, not as powerful as valgrind but still useful */
+#ifdef HWLOC_DEBUG
+#define HWLOC__BITMAP_CHECK(set) do { \
+ assert((set)->magic == HWLOC_BITMAP_MAGIC); \
+ assert((set)->ulongs_count >= 1); \
+ assert((set)->ulongs_allocated >= (set)->ulongs_count); \
+} while (0)
+#else
+#define HWLOC__BITMAP_CHECK(set)
+#endif
+
+/* extract a subset from a set using an index or a cpu */
+#define HWLOC_SUBBITMAP_INDEX(cpu) ((cpu)/(HWLOC_BITS_PER_LONG))
+#define HWLOC_SUBBITMAP_CPU_ULBIT(cpu) ((cpu)%(HWLOC_BITS_PER_LONG))
+/* Read from a bitmap ulong without knowing whether x is valid.
+ * Writers should make sure that x is valid and modify set->ulongs[x] directly.
+ */
+#define HWLOC_SUBBITMAP_READULONG(set,x) ((x) < (set)->ulongs_count ? (set)->ulongs[x] : (set)->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO)
+
+/* predefined subset values */
+#define HWLOC_SUBBITMAP_ZERO 0UL
+#define HWLOC_SUBBITMAP_FULL (~0UL)
+#define HWLOC_SUBBITMAP_ULBIT(bit) (1UL<<(bit))
+#define HWLOC_SUBBITMAP_CPU(cpu) HWLOC_SUBBITMAP_ULBIT(HWLOC_SUBBITMAP_CPU_ULBIT(cpu))
+#define HWLOC_SUBBITMAP_ULBIT_TO(bit) (HWLOC_SUBBITMAP_FULL>>(HWLOC_BITS_PER_LONG-1-(bit)))
+#define HWLOC_SUBBITMAP_ULBIT_FROM(bit) (HWLOC_SUBBITMAP_FULL<<(bit))
+#define HWLOC_SUBBITMAP_ULBIT_FROMTO(begin,end) (HWLOC_SUBBITMAP_ULBIT_TO(end) & HWLOC_SUBBITMAP_ULBIT_FROM(begin))
+
+struct hwloc_bitmap_s * hwloc_bitmap_alloc(void)
+{
+ struct hwloc_bitmap_s * set;
+
+ set = malloc(sizeof(struct hwloc_bitmap_s));
+ if (!set)
+ return NULL;
+
+ set->ulongs_count = 1;
+ set->ulongs_allocated = 64/sizeof(unsigned long);
+ set->ulongs = malloc(64);
+ if (!set->ulongs) {
+ free(set);
+ return NULL;
+ }
+
+ set->ulongs[0] = HWLOC_SUBBITMAP_ZERO;
+ set->infinite = 0;
+#ifdef HWLOC_DEBUG
+ set->magic = HWLOC_BITMAP_MAGIC;
+#endif
+ return set;
+}
+
+struct hwloc_bitmap_s * hwloc_bitmap_alloc_full(void)
+{
+ struct hwloc_bitmap_s * set = hwloc_bitmap_alloc();
+ if (set) {
+ set->infinite = 1;
+ set->ulongs[0] = HWLOC_SUBBITMAP_FULL;
+ }
+ return set;
+}
+
+void hwloc_bitmap_free(struct hwloc_bitmap_s * set)
+{
+ if (!set)
+ return;
+
+ HWLOC__BITMAP_CHECK(set);
+#ifdef HWLOC_DEBUG
+ set->magic = 0;
+#endif
+
+ free(set->ulongs);
+ free(set);
+}
+
+/* enlarge until it contains at least needed_count ulongs.
+ */
+static void
+hwloc_bitmap_enlarge_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count)
+{
+ unsigned tmp = 1 << hwloc_flsl((unsigned long) needed_count - 1);
+ if (tmp > set->ulongs_allocated) {
+ set->ulongs = realloc(set->ulongs, tmp * sizeof(unsigned long));
+ assert(set->ulongs);
+ set->ulongs_allocated = tmp;
+ }
+}
+
+/* enlarge until it contains at least needed_count ulongs,
+ * and update new ulongs according to the infinite field.
+ */
+static void
+hwloc_bitmap_realloc_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count)
+{
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ if (needed_count <= set->ulongs_count)
+ return;
+
+ /* realloc larger if needed */
+ hwloc_bitmap_enlarge_by_ulongs(set, needed_count);
+
+ /* fill the newly allocated subset depending on the infinite flag */
+ for(i=set->ulongs_count; iulongs[i] = set->infinite ? HWLOC_SUBBITMAP_FULL : HWLOC_SUBBITMAP_ZERO;
+ set->ulongs_count = needed_count;
+}
+
+/* realloc until it contains at least cpu+1 bits */
+#define hwloc_bitmap_realloc_by_cpu_index(set, cpu) hwloc_bitmap_realloc_by_ulongs(set, ((cpu)/HWLOC_BITS_PER_LONG)+1)
+
+/* reset a bitmap to exactely the needed size.
+ * the caller must reinitialize all ulongs and the infinite flag later.
+ */
+static void
+hwloc_bitmap_reset_by_ulongs(struct hwloc_bitmap_s * set, unsigned needed_count)
+{
+ hwloc_bitmap_enlarge_by_ulongs(set, needed_count);
+ set->ulongs_count = needed_count;
+}
+
+/* reset until it contains exactly cpu+1 bits (roundup to a ulong).
+ * the caller must reinitialize all ulongs and the infinite flag later.
+ */
+#define hwloc_bitmap_reset_by_cpu_index(set, cpu) hwloc_bitmap_reset_by_ulongs(set, ((cpu)/HWLOC_BITS_PER_LONG)+1)
+
+struct hwloc_bitmap_s * hwloc_bitmap_dup(const struct hwloc_bitmap_s * old)
+{
+ struct hwloc_bitmap_s * new;
+
+ if (!old)
+ return NULL;
+
+ HWLOC__BITMAP_CHECK(old);
+
+ new = malloc(sizeof(struct hwloc_bitmap_s));
+ if (!new)
+ return NULL;
+
+ new->ulongs = malloc(old->ulongs_allocated * sizeof(unsigned long));
+ if (!new->ulongs) {
+ free(new);
+ return NULL;
+ }
+ new->ulongs_allocated = old->ulongs_allocated;
+ new->ulongs_count = old->ulongs_count;
+ memcpy(new->ulongs, old->ulongs, new->ulongs_count * sizeof(unsigned long));
+ new->infinite = old->infinite;
+#ifdef HWLOC_DEBUG
+ new->magic = HWLOC_BITMAP_MAGIC;
+#endif
+ return new;
+}
+
+void hwloc_bitmap_copy(struct hwloc_bitmap_s * dst, const struct hwloc_bitmap_s * src)
+{
+ HWLOC__BITMAP_CHECK(dst);
+ HWLOC__BITMAP_CHECK(src);
+
+ hwloc_bitmap_reset_by_ulongs(dst, src->ulongs_count);
+
+ memcpy(dst->ulongs, src->ulongs, src->ulongs_count * sizeof(unsigned long));
+ dst->infinite = src->infinite;
+}
+
+/* Strings always use 32bit groups */
+#define HWLOC_PRIxSUBBITMAP "%08lx"
+#define HWLOC_BITMAP_SUBSTRING_SIZE 32
+#define HWLOC_BITMAP_SUBSTRING_LENGTH (HWLOC_BITMAP_SUBSTRING_SIZE/4)
+#define HWLOC_BITMAP_STRING_PER_LONG (HWLOC_BITS_PER_LONG/HWLOC_BITMAP_SUBSTRING_SIZE)
+
+int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+ ssize_t size = buflen;
+ char *tmp = buf;
+ int res, ret = 0;
+ int needcomma = 0;
+ int i;
+ unsigned long accum = 0;
+ int accumed = 0;
+#if HWLOC_BITS_PER_LONG == HWLOC_BITMAP_SUBSTRING_SIZE
+ const unsigned long accum_mask = ~0UL;
+#else /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */
+ const unsigned long accum_mask = ((1UL << HWLOC_BITMAP_SUBSTRING_SIZE) - 1) << (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE);
+#endif /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */
+
+ HWLOC__BITMAP_CHECK(set);
+
+ /* mark the end in case we do nothing later */
+ if (buflen > 0)
+ tmp[0] = '\0';
+
+ if (set->infinite) {
+ res = hwloc_snprintf(tmp, size, "0xf...f");
+ needcomma = 1;
+ if (res < 0)
+ return -1;
+ ret += res;
+ if (res >= size)
+ res = size>0 ? size - 1 : 0;
+ tmp += res;
+ size -= res;
+ /* optimize a common case: full bitmap should appear as 0xf...f instead of 0xf...f,0xffffffff */
+ if (set->ulongs_count == 1 && set->ulongs[0] == HWLOC_SUBBITMAP_FULL)
+ return ret;
+ }
+
+ i=set->ulongs_count-1;
+ while (i>=0 || accumed) {
+ /* Refill accumulator */
+ if (!accumed) {
+ accum = set->ulongs[i--];
+ accumed = HWLOC_BITS_PER_LONG;
+ }
+
+ if (accum & accum_mask) {
+ /* print the whole subset if not empty */
+ res = hwloc_snprintf(tmp, size, needcomma ? ",0x" HWLOC_PRIxSUBBITMAP : "0x" HWLOC_PRIxSUBBITMAP,
+ (accum & accum_mask) >> (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE));
+ needcomma = 1;
+ } else if (i == -1 && accumed == HWLOC_BITMAP_SUBSTRING_SIZE) {
+ /* print a single 0 to mark the last subset */
+ res = hwloc_snprintf(tmp, size, needcomma ? ",0x0" : "0x0");
+ } else if (needcomma) {
+ res = hwloc_snprintf(tmp, size, ",");
+ } else {
+ res = 0;
+ }
+ if (res < 0)
+ return -1;
+ ret += res;
+
+#if HWLOC_BITS_PER_LONG == HWLOC_BITMAP_SUBSTRING_SIZE
+ accum = 0;
+ accumed = 0;
+#else
+ accum <<= HWLOC_BITMAP_SUBSTRING_SIZE;
+ accumed -= HWLOC_BITMAP_SUBSTRING_SIZE;
+#endif
+
+ if (res >= size)
+ res = size>0 ? size - 1 : 0;
+
+ tmp += res;
+ size -= res;
+ }
+
+ return ret;
+}
+
+int hwloc_bitmap_asprintf(char ** strp, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+ int len;
+ char *buf;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ len = hwloc_bitmap_snprintf(NULL, 0, set);
+ buf = malloc(len+1);
+ *strp = buf;
+ return hwloc_bitmap_snprintf(buf, len+1, set);
+}
+
+int hwloc_bitmap_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restrict string)
+{
+ const char * current = string;
+ unsigned long accum = 0;
+ int count=0;
+ int infinite = 0;
+
+ /* count how many substrings there are */
+ count++;
+ while ((current = strchr(current+1, ',')) != NULL)
+ count++;
+
+ current = string;
+ if (!strncmp("0xf...f", current, 7)) {
+ current += 7;
+ if (*current != ',') {
+ /* special case for infinite/full bitmap */
+ hwloc_bitmap_fill(set);
+ return 0;
+ }
+ current++;
+ infinite = 1;
+ count--;
+ }
+
+ hwloc_bitmap_reset_by_ulongs(set, (count + HWLOC_BITMAP_STRING_PER_LONG - 1) / HWLOC_BITMAP_STRING_PER_LONG);
+ set->infinite = 0;
+
+ while (*current != '\0') {
+ unsigned long val;
+ char *next;
+ val = strtoul(current, &next, 16);
+
+ assert(count > 0);
+ count--;
+
+ accum |= (val << ((count * HWLOC_BITMAP_SUBSTRING_SIZE) % HWLOC_BITS_PER_LONG));
+ if (!(count % HWLOC_BITMAP_STRING_PER_LONG)) {
+ set->ulongs[count / HWLOC_BITMAP_STRING_PER_LONG] = accum;
+ accum = 0;
+ }
+
+ if (*next != ',') {
+ if (*next || count > 0)
+ goto failed;
+ else
+ break;
+ }
+ current = (const char*) next+1;
+ }
+
+ set->infinite = infinite; /* set at the end, to avoid spurious realloc with filled new ulongs */
+
+ return 0;
+
+ failed:
+ /* failure to parse */
+ hwloc_bitmap_zero(set);
+ return -1;
+}
+
+int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+ int prev = -1;
+ hwloc_bitmap_t reverse;
+ ssize_t size = buflen;
+ char *tmp = buf;
+ int res, ret = 0;
+ int needcomma = 0;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ reverse = hwloc_bitmap_alloc(); /* FIXME: add hwloc_bitmap_alloc_size() + hwloc_bitmap_init_allocated() to avoid malloc? */
+ hwloc_bitmap_not(reverse, set);
+
+ /* mark the end in case we do nothing later */
+ if (buflen > 0)
+ tmp[0] = '\0';
+
+ while (1) {
+ int begin, end;
+
+ begin = hwloc_bitmap_next(set, prev);
+ if (begin == -1)
+ break;
+ end = hwloc_bitmap_next(reverse, begin);
+
+ if (end == begin+1) {
+ res = hwloc_snprintf(tmp, size, needcomma ? ",%d" : "%d", begin);
+ } else if (end == -1) {
+ res = hwloc_snprintf(tmp, size, needcomma ? ",%d-" : "%d-", begin);
+ } else {
+ res = hwloc_snprintf(tmp, size, needcomma ? ",%d-%d" : "%d-%d", begin, end-1);
+ }
+ if (res < 0) {
+ hwloc_bitmap_free(reverse);
+ return -1;
+ }
+ ret += res;
+
+ if (res >= size)
+ res = size>0 ? size - 1 : 0;
+
+ tmp += res;
+ size -= res;
+ needcomma = 1;
+
+ if (end == -1)
+ break;
+ else
+ prev = end - 1;
+ }
+
+ hwloc_bitmap_free(reverse);
+
+ return ret;
+}
+
+int hwloc_bitmap_list_asprintf(char ** strp, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+ int len;
+ char *buf;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ len = hwloc_bitmap_list_snprintf(NULL, 0, set);
+ buf = malloc(len+1);
+ *strp = buf;
+ return hwloc_bitmap_list_snprintf(buf, len+1, set);
+}
+
+int hwloc_bitmap_list_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restrict string)
+{
+ const char * current = string;
+ char *next;
+ long begin = -1, val;
+
+ hwloc_bitmap_zero(set);
+
+ while (*current != '\0') {
+
+ /* ignore empty ranges */
+ while (*current == ',')
+ current++;
+
+ val = strtoul(current, &next, 0);
+ /* make sure we got at least one digit */
+ if (next == current)
+ goto failed;
+
+ if (begin != -1) {
+ /* finishing a range */
+ hwloc_bitmap_set_range(set, begin, val);
+ begin = -1;
+
+ } else if (*next == '-') {
+ /* starting a new range */
+ if (*(next+1) == '\0') {
+ /* infinite range */
+ hwloc_bitmap_set_range(set, val, -1);
+ break;
+ } else {
+ /* normal range */
+ begin = val;
+ }
+
+ } else if (*next == ',' || *next == '\0') {
+ /* single digit */
+ hwloc_bitmap_set(set, val);
+ }
+
+ if (*next == '\0')
+ break;
+ current = next+1;
+ }
+
+ return 0;
+
+ failed:
+ /* failure to parse */
+ hwloc_bitmap_zero(set);
+ return -1;
+}
+
+int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+ ssize_t size = buflen;
+ char *tmp = buf;
+ int res, ret = 0;
+ int started = 0;
+ int i;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ /* mark the end in case we do nothing later */
+ if (buflen > 0)
+ tmp[0] = '\0';
+
+ if (set->infinite) {
+ res = hwloc_snprintf(tmp, size, "0xf...f");
+ started = 1;
+ if (res < 0)
+ return -1;
+ ret += res;
+ if (res >= size)
+ res = size>0 ? size - 1 : 0;
+ tmp += res;
+ size -= res;
+ /* optimize a common case: full bitmap should appear as 0xf...f instead of 0xf...fffffffff */
+ if (set->ulongs_count == 1 && set->ulongs[0] == HWLOC_SUBBITMAP_FULL)
+ return ret;
+ }
+
+ i=set->ulongs_count-1;
+ while (i>=0) {
+ unsigned long val = set->ulongs[i--];
+ if (started) {
+ /* print the whole subset */
+#if HWLOC_BITS_PER_LONG == 64
+ res = hwloc_snprintf(tmp, size, "%016lx", val);
+#else
+ res = hwloc_snprintf(tmp, size, "%08lx", val);
+#endif
+ } else if (val || i == -1) {
+ res = hwloc_snprintf(tmp, size, "0x%lx", val);
+ started = 1;
+ } else {
+ res = 0;
+ }
+ if (res < 0)
+ return -1;
+ ret += res;
+ if (res >= size)
+ res = size>0 ? size - 1 : 0;
+ tmp += res;
+ size -= res;
+ }
+
+ return ret;
+}
+
+int hwloc_bitmap_taskset_asprintf(char ** strp, const struct hwloc_bitmap_s * __hwloc_restrict set)
+{
+ int len;
+ char *buf;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ len = hwloc_bitmap_taskset_snprintf(NULL, 0, set);
+ buf = malloc(len+1);
+ *strp = buf;
+ return hwloc_bitmap_taskset_snprintf(buf, len+1, set);
+}
+
+int hwloc_bitmap_taskset_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restrict string)
+{
+ const char * current = string;
+ int chars;
+ int count;
+ int infinite = 0;
+
+ current = string;
+ if (!strncmp("0xf...f", current, 7)) {
+ /* infinite bitmap */
+ infinite = 1;
+ current += 7;
+ if (*current == '\0') {
+ /* special case for infinite/full bitmap */
+ hwloc_bitmap_fill(set);
+ return 0;
+ }
+ } else {
+ /* finite bitmap */
+ if (!strncmp("0x", current, 2))
+ current += 2;
+ if (*current == '\0') {
+ /* special case for empty bitmap */
+ hwloc_bitmap_zero(set);
+ return 0;
+ }
+ }
+ /* we know there are other characters now */
+
+ chars = strlen(current);
+ count = (chars * 4 + HWLOC_BITS_PER_LONG - 1) / HWLOC_BITS_PER_LONG;
+
+ hwloc_bitmap_reset_by_ulongs(set, count);
+ set->infinite = 0;
+
+ while (*current != '\0') {
+ int tmpchars;
+ char ustr[17];
+ unsigned long val;
+ char *next;
+
+ tmpchars = chars % (HWLOC_BITS_PER_LONG/4);
+ if (!tmpchars)
+ tmpchars = (HWLOC_BITS_PER_LONG/4);
+
+ memcpy(ustr, current, tmpchars);
+ ustr[tmpchars] = '\0';
+ val = strtoul(ustr, &next, 16);
+ if (*next != '\0')
+ goto failed;
+
+ set->ulongs[count-1] = val;
+
+ current += tmpchars;
+ chars -= tmpchars;
+ count--;
+ }
+
+ set->infinite = infinite; /* set at the end, to avoid spurious realloc with filled new ulongs */
+
+ return 0;
+
+ failed:
+ /* failure to parse */
+ hwloc_bitmap_zero(set);
+ return -1;
+}
+
+static void hwloc_bitmap__zero(struct hwloc_bitmap_s *set)
+{
+ unsigned i;
+ for(i=0; iulongs_count; i++)
+ set->ulongs[i] = HWLOC_SUBBITMAP_ZERO;
+ set->infinite = 0;
+}
+
+void hwloc_bitmap_zero(struct hwloc_bitmap_s * set)
+{
+ HWLOC__BITMAP_CHECK(set);
+
+ hwloc_bitmap_reset_by_ulongs(set, 1);
+ hwloc_bitmap__zero(set);
+}
+
+static void hwloc_bitmap__fill(struct hwloc_bitmap_s * set)
+{
+ unsigned i;
+ for(i=0; iulongs_count; i++)
+ set->ulongs[i] = HWLOC_SUBBITMAP_FULL;
+ set->infinite = 1;
+}
+
+void hwloc_bitmap_fill(struct hwloc_bitmap_s * set)
+{
+ HWLOC__BITMAP_CHECK(set);
+
+ hwloc_bitmap_reset_by_ulongs(set, 1);
+ hwloc_bitmap__fill(set);
+}
+
+void hwloc_bitmap_from_ulong(struct hwloc_bitmap_s *set, unsigned long mask)
+{
+ HWLOC__BITMAP_CHECK(set);
+
+ hwloc_bitmap_reset_by_ulongs(set, 1);
+ set->ulongs[0] = mask; /* there's always at least one ulong allocated */
+ set->infinite = 0;
+}
+
+void hwloc_bitmap_from_ith_ulong(struct hwloc_bitmap_s *set, unsigned i, unsigned long mask)
+{
+ unsigned j;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ hwloc_bitmap_reset_by_ulongs(set, i+1);
+ set->ulongs[i] = mask;
+ for(j=0; julongs[j] = HWLOC_SUBBITMAP_ZERO;
+ set->infinite = 0;
+}
+
+unsigned long hwloc_bitmap_to_ulong(const struct hwloc_bitmap_s *set)
+{
+ HWLOC__BITMAP_CHECK(set);
+
+ return set->ulongs[0]; /* there's always at least one ulong allocated */
+}
+
+unsigned long hwloc_bitmap_to_ith_ulong(const struct hwloc_bitmap_s *set, unsigned i)
+{
+ HWLOC__BITMAP_CHECK(set);
+
+ return HWLOC_SUBBITMAP_READULONG(set, i);
+}
+
+void hwloc_bitmap_only(struct hwloc_bitmap_s * set, unsigned cpu)
+{
+ unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
+
+ HWLOC__BITMAP_CHECK(set);
+
+ hwloc_bitmap_reset_by_cpu_index(set, cpu);
+ hwloc_bitmap__zero(set);
+ set->ulongs[index_] |= HWLOC_SUBBITMAP_CPU(cpu);
+}
+
+void hwloc_bitmap_allbut(struct hwloc_bitmap_s * set, unsigned cpu)
+{
+ unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
+
+ HWLOC__BITMAP_CHECK(set);
+
+ hwloc_bitmap_reset_by_cpu_index(set, cpu);
+ hwloc_bitmap__fill(set);
+ set->ulongs[index_] &= ~HWLOC_SUBBITMAP_CPU(cpu);
+}
+
+void hwloc_bitmap_set(struct hwloc_bitmap_s * set, unsigned cpu)
+{
+ unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
+
+ HWLOC__BITMAP_CHECK(set);
+
+ /* nothing to do if setting inside the infinite part of the bitmap */
+ if (set->infinite && cpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+ return;
+
+ hwloc_bitmap_realloc_by_cpu_index(set, cpu);
+ set->ulongs[index_] |= HWLOC_SUBBITMAP_CPU(cpu);
+}
+
+void hwloc_bitmap_set_range(struct hwloc_bitmap_s * set, unsigned begincpu, int _endcpu)
+{
+ unsigned i;
+ unsigned beginset,endset;
+ unsigned endcpu = (unsigned) _endcpu;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ if (_endcpu == -1) {
+ set->infinite = 1;
+ /* keep endcpu == -1 since this unsigned is actually larger than anything else */
+ }
+
+ if (set->infinite) {
+ /* truncate the range according to the infinite part of the bitmap */
+ if (endcpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+ endcpu = set->ulongs_count * HWLOC_BITS_PER_LONG - 1;
+ if (begincpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+ return;
+ }
+ if (endcpu < begincpu)
+ return;
+ hwloc_bitmap_realloc_by_cpu_index(set, endcpu);
+
+ beginset = HWLOC_SUBBITMAP_INDEX(begincpu);
+ endset = HWLOC_SUBBITMAP_INDEX(endcpu);
+ for(i=beginset+1; iulongs[i] = HWLOC_SUBBITMAP_FULL;
+ if (beginset == endset) {
+ set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROMTO(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu), HWLOC_SUBBITMAP_CPU_ULBIT(endcpu));
+ } else {
+ set->ulongs[beginset] |= HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu));
+ set->ulongs[endset] |= HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(endcpu));
+ }
+}
+
+void hwloc_bitmap_set_ith_ulong(struct hwloc_bitmap_s *set, unsigned i, unsigned long mask)
+{
+ HWLOC__BITMAP_CHECK(set);
+
+ hwloc_bitmap_realloc_by_ulongs(set, i+1);
+ set->ulongs[i] = mask;
+}
+
+void hwloc_bitmap_clr(struct hwloc_bitmap_s * set, unsigned cpu)
+{
+ unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
+
+ HWLOC__BITMAP_CHECK(set);
+
+ /* nothing to do if clearing inside the infinitely-unset part of the bitmap */
+ if (!set->infinite && cpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+ return;
+
+ hwloc_bitmap_realloc_by_cpu_index(set, cpu);
+ set->ulongs[index_] &= ~HWLOC_SUBBITMAP_CPU(cpu);
+}
+
+void hwloc_bitmap_clr_range(struct hwloc_bitmap_s * set, unsigned begincpu, int _endcpu)
+{
+ unsigned i;
+ unsigned beginset,endset;
+ unsigned endcpu = (unsigned) _endcpu;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ if (_endcpu == -1) {
+ set->infinite = 0;
+ /* keep endcpu == -1 since this unsigned is actually larger than anything else */
+ }
+
+ if (!set->infinite) {
+ /* truncate the range according to the infinitely-unset part of the bitmap */
+ if (endcpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+ endcpu = set->ulongs_count * HWLOC_BITS_PER_LONG - 1;
+ if (begincpu >= set->ulongs_count * HWLOC_BITS_PER_LONG)
+ return;
+ }
+ if (endcpu < begincpu)
+ return;
+ hwloc_bitmap_realloc_by_cpu_index(set, endcpu);
+
+ beginset = HWLOC_SUBBITMAP_INDEX(begincpu);
+ endset = HWLOC_SUBBITMAP_INDEX(endcpu);
+ for(i=beginset+1; iulongs[i] = HWLOC_SUBBITMAP_ZERO;
+ if (beginset == endset) {
+ set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROMTO(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu), HWLOC_SUBBITMAP_CPU_ULBIT(endcpu));
+ } else {
+ set->ulongs[beginset] &= ~HWLOC_SUBBITMAP_ULBIT_FROM(HWLOC_SUBBITMAP_CPU_ULBIT(begincpu));
+ set->ulongs[endset] &= ~HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(endcpu));
+ }
+}
+
+int hwloc_bitmap_isset(const struct hwloc_bitmap_s * set, unsigned cpu)
+{
+ unsigned index_ = HWLOC_SUBBITMAP_INDEX(cpu);
+
+ HWLOC__BITMAP_CHECK(set);
+
+ return (HWLOC_SUBBITMAP_READULONG(set, index_) & HWLOC_SUBBITMAP_CPU(cpu)) != 0;
+}
+
+int hwloc_bitmap_iszero(const struct hwloc_bitmap_s *set)
+{
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ if (set->infinite)
+ return 0;
+ for(i=0; iulongs_count; i++)
+ if (set->ulongs[i] != HWLOC_SUBBITMAP_ZERO)
+ return 0;
+ return 1;
+}
+
+int hwloc_bitmap_isfull(const struct hwloc_bitmap_s *set)
+{
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ if (!set->infinite)
+ return 0;
+ for(i=0; iulongs_count; i++)
+ if (set->ulongs[i] != HWLOC_SUBBITMAP_FULL)
+ return 0;
+ return 1;
+}
+
+int hwloc_bitmap_isequal (const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(set1);
+ HWLOC__BITMAP_CHECK(set2);
+
+ for(i=0; iulongs_count || iulongs_count; i++)
+ if (HWLOC_SUBBITMAP_READULONG(set1, i) != HWLOC_SUBBITMAP_READULONG(set2, i))
+ return 0;
+
+ if (set1->infinite != set2->infinite)
+ return 0;
+
+ return 1;
+}
+
+int hwloc_bitmap_intersects (const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(set1);
+ HWLOC__BITMAP_CHECK(set2);
+
+ for(i=0; iulongs_count || iulongs_count; i++)
+ if ((HWLOC_SUBBITMAP_READULONG(set1, i) & HWLOC_SUBBITMAP_READULONG(set2, i)) != HWLOC_SUBBITMAP_ZERO)
+ return 1;
+
+ if (set1->infinite && set2->infinite)
+ return 0;
+
+ return 0;
+}
+
+int hwloc_bitmap_isincluded (const struct hwloc_bitmap_s *sub_set, const struct hwloc_bitmap_s *super_set)
+{
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(sub_set);
+ HWLOC__BITMAP_CHECK(super_set);
+
+ for(i=0; iulongs_count; i++)
+ if (HWLOC_SUBBITMAP_READULONG(super_set, i) != (HWLOC_SUBBITMAP_READULONG(super_set, i) | HWLOC_SUBBITMAP_READULONG(sub_set, i)))
+ return 0;
+
+ if (sub_set->infinite && !super_set->infinite)
+ return 0;
+
+ return 1;
+}
+
+void hwloc_bitmap_or (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+ const struct hwloc_bitmap_s *largest = set1->ulongs_count > set2->ulongs_count ? set1 : set2;
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(res);
+ HWLOC__BITMAP_CHECK(set1);
+ HWLOC__BITMAP_CHECK(set2);
+
+ hwloc_bitmap_realloc_by_ulongs(res, largest->ulongs_count); /* cannot reset since the output may also be an input */
+
+ for(i=0; iulongs_count; i++)
+ res->ulongs[i] = HWLOC_SUBBITMAP_READULONG(set1, i) | HWLOC_SUBBITMAP_READULONG(set2, i);
+
+ res->infinite = set1->infinite || set2->infinite;
+}
+
+void hwloc_bitmap_and (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+ const struct hwloc_bitmap_s *largest = set1->ulongs_count > set2->ulongs_count ? set1 : set2;
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(res);
+ HWLOC__BITMAP_CHECK(set1);
+ HWLOC__BITMAP_CHECK(set2);
+
+ hwloc_bitmap_realloc_by_ulongs(res, largest->ulongs_count); /* cannot reset since the output may also be an input */
+
+ for(i=0; iulongs_count; i++)
+ res->ulongs[i] = HWLOC_SUBBITMAP_READULONG(set1, i) & HWLOC_SUBBITMAP_READULONG(set2, i);
+
+ res->infinite = set1->infinite && set2->infinite;
+}
+
+void hwloc_bitmap_andnot (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+ const struct hwloc_bitmap_s *largest = set1->ulongs_count > set2->ulongs_count ? set1 : set2;
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(res);
+ HWLOC__BITMAP_CHECK(set1);
+ HWLOC__BITMAP_CHECK(set2);
+
+ hwloc_bitmap_realloc_by_ulongs(res, largest->ulongs_count); /* cannot reset since the output may also be an input */
+
+ for(i=0; iulongs_count; i++)
+ res->ulongs[i] = HWLOC_SUBBITMAP_READULONG(set1, i) & ~HWLOC_SUBBITMAP_READULONG(set2, i);
+
+ res->infinite = set1->infinite && !set2->infinite;
+}
+
+void hwloc_bitmap_xor (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set1, const struct hwloc_bitmap_s *set2)
+{
+ const struct hwloc_bitmap_s *largest = set1->ulongs_count > set2->ulongs_count ? set1 : set2;
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(res);
+ HWLOC__BITMAP_CHECK(set1);
+ HWLOC__BITMAP_CHECK(set2);
+
+ hwloc_bitmap_realloc_by_ulongs(res, largest->ulongs_count); /* cannot reset since the output may also be an input */
+
+ for(i=0; iulongs_count; i++)
+ res->ulongs[i] = HWLOC_SUBBITMAP_READULONG(set1, i) ^ HWLOC_SUBBITMAP_READULONG(set2, i);
+
+ res->infinite = (!set1->infinite) != (!set2->infinite);
+}
+
+void hwloc_bitmap_not (struct hwloc_bitmap_s *res, const struct hwloc_bitmap_s *set)
+{
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(res);
+ HWLOC__BITMAP_CHECK(set);
+
+ hwloc_bitmap_realloc_by_ulongs(res, set->ulongs_count); /* cannot reset since the output may also be an input */
+
+ for(i=0; iulongs_count; i++)
+ res->ulongs[i] = ~HWLOC_SUBBITMAP_READULONG(set, i);
+
+ res->infinite = !set->infinite;
+}
+
+int hwloc_bitmap_first(const struct hwloc_bitmap_s * set)
+{
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ for(i=0; iulongs_count; i++) {
+ /* subsets are unsigned longs, use ffsl */
+ unsigned long w = set->ulongs[i];
+ if (w)
+ return hwloc_ffsl(w) - 1 + HWLOC_BITS_PER_LONG*i;
+ }
+
+ if (set->infinite)
+ return set->ulongs_count * HWLOC_BITS_PER_LONG;
+
+ return -1;
+}
+
+int hwloc_bitmap_last(const struct hwloc_bitmap_s * set)
+{
+ int i;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ if (set->infinite)
+ return -1;
+
+ for(i=set->ulongs_count-1; i>=0; i--) {
+ /* subsets are unsigned longs, use flsl */
+ unsigned long w = set->ulongs[i];
+ if (w)
+ return hwloc_flsl(w) - 1 + HWLOC_BITS_PER_LONG*i;
+ }
+
+ return -1;
+}
+
+int hwloc_bitmap_next(const struct hwloc_bitmap_s * set, int prev_cpu)
+{
+ unsigned i = HWLOC_SUBBITMAP_INDEX(prev_cpu + 1);
+
+ HWLOC__BITMAP_CHECK(set);
+
+ if (i >= set->ulongs_count) {
+ if (set->infinite)
+ return prev_cpu + 1;
+ else
+ return -1;
+ }
+
+ for(; iulongs_count; i++) {
+ /* subsets are unsigned longs, use ffsl */
+ unsigned long w = set->ulongs[i];
+
+ /* if the prev cpu is in the same word as the possible next one,
+ we need to mask out previous cpus */
+ if (prev_cpu >= 0 && HWLOC_SUBBITMAP_INDEX((unsigned) prev_cpu) == i)
+ w &= ~HWLOC_SUBBITMAP_ULBIT_TO(HWLOC_SUBBITMAP_CPU_ULBIT(prev_cpu));
+
+ if (w)
+ return hwloc_ffsl(w) - 1 + HWLOC_BITS_PER_LONG*i;
+ }
+
+ if (set->infinite)
+ return set->ulongs_count * HWLOC_BITS_PER_LONG;
+
+ return -1;
+}
+
+void hwloc_bitmap_singlify(struct hwloc_bitmap_s * set)
+{
+ unsigned i;
+ int found = 0;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ for(i=0; iulongs_count; i++) {
+ if (found) {
+ set->ulongs[i] = HWLOC_SUBBITMAP_ZERO;
+ continue;
+ } else {
+ /* subsets are unsigned longs, use ffsl */
+ unsigned long w = set->ulongs[i];
+ if (w) {
+ int _ffs = hwloc_ffsl(w);
+ set->ulongs[i] = HWLOC_SUBBITMAP_CPU(_ffs-1);
+ found = 1;
+ }
+ }
+ }
+
+ if (set->infinite) {
+ if (found) {
+ set->infinite = 0;
+ } else {
+ /* set the first non allocated bit */
+ unsigned first = set->ulongs_count * HWLOC_BITS_PER_LONG;
+ set->infinite = 0; /* do not let realloc fill the newly allocated sets */
+ hwloc_bitmap_set(set, first);
+ }
+ }
+}
+
+int hwloc_bitmap_compare_first(const struct hwloc_bitmap_s * set1, const struct hwloc_bitmap_s * set2)
+{
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(set1);
+ HWLOC__BITMAP_CHECK(set2);
+
+ for(i=0; iulongs_count || iulongs_count; i++) {
+ unsigned long w1 = HWLOC_SUBBITMAP_READULONG(set1, i);
+ unsigned long w2 = HWLOC_SUBBITMAP_READULONG(set2, i);
+ if (w1 || w2) {
+ int _ffs1 = hwloc_ffsl(w1);
+ int _ffs2 = hwloc_ffsl(w2);
+ /* if both have a bit set, compare for real */
+ if (_ffs1 && _ffs2)
+ return _ffs1-_ffs2;
+ /* one is empty, and it is considered higher, so reverse-compare them */
+ return _ffs2-_ffs1;
+ }
+ }
+ if ((!set1->infinite) != (!set2->infinite))
+ return !!set1->infinite - !!set2->infinite;
+ return 0;
+}
+
+int hwloc_bitmap_compare(const struct hwloc_bitmap_s * set1, const struct hwloc_bitmap_s * set2)
+{
+ const struct hwloc_bitmap_s *largest = set1->ulongs_count > set2->ulongs_count ? set1 : set2;
+ int i;
+
+ HWLOC__BITMAP_CHECK(set1);
+ HWLOC__BITMAP_CHECK(set2);
+
+ if ((!set1->infinite) != (!set2->infinite))
+ return !!set1->infinite - !!set2->infinite;
+
+ for(i=largest->ulongs_count-1; i>=0; i--) {
+ unsigned long val1 = HWLOC_SUBBITMAP_READULONG(set1, (unsigned) i);
+ unsigned long val2 = HWLOC_SUBBITMAP_READULONG(set2, (unsigned) i);
+ if (val1 == val2)
+ continue;
+ return val1 < val2 ? -1 : 1;
+ }
+
+ return 0;
+}
+
+int hwloc_bitmap_weight(const struct hwloc_bitmap_s * set)
+{
+ int weight = 0;
+ unsigned i;
+
+ HWLOC__BITMAP_CHECK(set);
+
+ if (set->infinite)
+ return -1;
+
+ for(i=0; iulongs_count; i++)
+ weight += hwloc_weight_long(set->ulongs[i]);
+ return weight;
+}
diff --git a/ext/hwloc/src/components.c b/ext/hwloc/src/components.c
new file mode 100644
index 000000000..14112073a
--- /dev/null
+++ b/ext/hwloc/src/components.c
@@ -0,0 +1,746 @@
+/*
+ * Copyright © 2009-2013 Inria. All rights reserved.
+ * Copyright © 2012 Université Bordeau 1
+ * See COPYING in top-level directory.
+ */
+
+#include
+#include
+#include
+#include
+
+#define HWLOC_COMPONENT_STOP_NAME "stop"
+#define HWLOC_COMPONENT_EXCLUDE_CHAR '-'
+#define HWLOC_COMPONENT_SEPS ","
+
+/* list of all registered discovery components, sorted by priority, higher priority first.
+ * noos is last because its priority is 0.
+ * others' priority is 10.
+ */
+static struct hwloc_disc_component * hwloc_disc_components = NULL;
+
+static unsigned hwloc_components_users = 0; /* first one initializes, last ones destroys */
+
+static int hwloc_components_verbose = 0;
+#ifdef HWLOC_HAVE_PLUGINS
+static int hwloc_plugins_verbose = 0;
+#endif
+
+#ifdef HWLOC_WIN_SYS
+/* Basic mutex on top of InterlockedCompareExchange() on windows,
+ * Far from perfect, but easy to maintain, and way enough given that this code will never be needed for real. */
+#include
+static LONG hwloc_components_mutex = 0;
+#define HWLOC_COMPONENTS_LOCK() do { \
+ while (InterlockedCompareExchange(&hwloc_components_mutex, 1, 0) != 0) \
+ SwitchToThread(); \
+} while (0)
+#define HWLOC_COMPONENTS_UNLOCK() do { \
+ assert(hwloc_components_mutex == 1); \
+ hwloc_components_mutex = 0; \
+} while (0)
+
+#elif defined HWLOC_HAVE_PTHREAD_MUTEX
+/* pthread mutex if available (except on windows) */
+#include