diff --git a/AUTHORS.txt b/AUTHORS.txt new file mode 100644 index 00000000..89be811c --- /dev/null +++ b/AUTHORS.txt @@ -0,0 +1,9 @@ +This software was developed at the EPFL Library (ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries), in collaboration with the Scientific Information Division of the University of Geneva (Université de Genève, Division de l’information scientifique). + +Contributors +------------ + +Hugo Gallupo +Alain Borel +Matthias Bräuninger +Pablo Iriarte diff --git a/Dockerfile b/Dockerfile index 71ab7916..7699757f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,113 +1,115 @@ # The first instruction is what image we want to base our container on # We Use an official Python runtime as a parent image FROM python:3.9 # The enviroment variable ensures that the python output is set straight # to the terminal with out buffering it first ENV PYTHONUNBUFFERED 1 # Not forcing anybody's hand here, just a bunch of packages that could become useful soon RUN apt-get update && \ apt-get upgrade -y && \ apt-get install -y nginx supervisor sqlite3 mariadb-client rsync && \ pip3 install -U pip setuptools && \ rm -rf /var/lib/apt/lists/* # Set the working directory to /OACCT_checker WORKDIR /oacct_checker #RUN chown 1001 /oacct_checker # Copy the current directory contents into the container at /OACCT_checker ADD . /oacct_checker/ # Install any needed packages specified in requirements.txt RUN pip install -r requirements.txt # install Node.JS ansd NPM RUN curl -fsSL https://deb.nodesource.com/setup_16.x | bash - RUN apt-get install -y nodejs # frontend with node js RUN npm install # COPY . /oacct_checker/ #RUN chown -R 1001 /oacct_checker #RUN find /oacct_checker -path "*/node_modules/*" ! -path "*/node_modules" -exec chown 1001:0 {} \; RUN chown 1001:0 /oacct_checker RUN chown -R 1001:0 /oacct_checker/static/ /oacct_checker/reactDoc/styleguide # Permissions as per https://docs.openshift.com/container-platform/3.11/creating_images/guidelines.html#openshift-specific-guidelines #RUN chgrp -R 0 /oacct_checker && \ # chmod -R g=u /oacct_checker #RUN find /oacct_checker ! -path "*/node_modules/*" ! -path "*/node_modules" -exec chmod g=u {} \; RUN chmod g=u /oacct_checker RUN chmod -R g=u /oacct_checker/static/ /oacct_checker/reactDoc/styleguide # install uwsgi now because it takes a little while RUN pip3 install uwsgi # setup all the configfiles COPY conf/supervisord.conf /etc/supervisor/supervisord.conf RUN echo "daemon off;" >> /etc/nginx/nginx.conf COPY conf/nginx-app.conf /etc/nginx/sites-available/default COPY conf/supervisor-app.conf /etc/supervisor/conf.d/ # not really necessary on OpenShift but doesn't hurt COPY conf/ssl/nginx-selfsigned.key /etc/ssl/private/ COPY conf/ssl/nginx-selfsigned.crt /etc/ssl/certs/ COPY conf/ssl/dhparam.pem /etc/ssl/certs/ COPY conf/ssl/self-signed.conf /etc/nginx/snippets COPY conf/ssl/ssl-params.conf /etc/nginx/snippets # Adjust permissions to allow supervisord & nginx logs on Openshift RUN chmod -R a+w /var/log/ RUN chmod -R a+w /var/run/ RUN chmod a+w /var/lib/nginx/ RUN chmod a+rx /etc/ssl/private/ # comment user directive as master process is run as user in OpenShift anyhow RUN sed -i.bak 's/^user/#user/' /etc/nginx/nginx.conf +RUN sed -i.bak 's/access_log \/var/#access_log \/var/' /etc/nginx/nginx.conf +RUN sed -i.bak 's/error_log \/var/#error_log \/var/' /etc/nginx/nginx.conf # Redirect nginx logs to stdout and stderr to make them accessible on OpenShift #RUN ln -sf /dev/stdout /var/log/nginx/access.log && ln -sf /dev/stderr /var/log/nginx/error.log RUN touch /var/log/nginx/access.log /var/log/nginx/error.log RUN chmod a+r /var/log/nginx/access.log /var/log/nginx/error.log RUN touch /var/log/wsgi.log /var/log/wsgi2.log RUN chmod a+rw /var/log/wsgi.log /var/log/wsgi2.log # build app for production RUN npm run build # build styleguide documentation RUN npm run styleguide:build # build Sphinx documentation (just in case...) WORKDIR /oacct_checker/sphinx RUN make html WORKDIR /oacct_checker # Collect static files RUN python manage.py collectstatic --no-input #User at the end to avoid access error during building process USER 1001 #CMD /bin/bash -c 'python3 manage.py runserver 0.0.0.0:8080' CMD supervisord -n -c /etc/supervisor/supervisord.conf # CMD /bin/bash -c 'python3 manage.py collectstatic --noinput && python3 manage.py runserver 0.0.0.0:8080' # test with static files # PermissionError: [Errno 13] Permission denied: '/oacct_checker/staticfiles' # 15.03.2021 new error PermissionError: [Errno 1] Operation not permitted # CMD /bin/bash -c 'python3 manage.py collectstatic --noinput && python3 manage.py runserver 0.0.0.0:8080' #test with npm run build --> npm not found diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000..be3f7b28 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/assets/docs/installation.md b/assets/docs/installation.md index 19bc3c5f..fa63c4e5 100644 --- a/assets/docs/installation.md +++ b/assets/docs/installation.md @@ -1,26 +1,26 @@ ### Run docker app -1. Build the docker image (while in the application main directory): -``` -docker build -t oacct_image . -``` +1. Build the Docker image (while in the application main directory): + + `docker build -t oacct_image .` 2. Build and run the container for the first time: -``` -docker run -dp 8080:8080 --name oacct oacct_image -``` -The application will be available at http://127.0.0.1:8000 + + `docker run -dp 8080:8080 --name oacct oacct_image + ` + + The application will be available at http://127.0.0.1:8000 3. If you are using a fresh container, you will probably want to create an admin account of your choice inside the container: -``` -docker exec -ti oacct sh -c "python manage.py createsuperuser" -``` + `docker exec -ti oacct sh -c "python manage.py createsuperuser" + ` + 4. Stop the container if you no longer need to keep it in memory: -``` -docker stop oacct -``` + + `docker stop oacct + ` 5. If you want to start the existing container later: -``` -docker start oacct -``` + + `docker start oacct + ` diff --git a/assets/docs/introduction.md b/assets/docs/introduction.md index 1b1600c9..a140bfb6 100644 --- a/assets/docs/introduction.md +++ b/assets/docs/introduction.md @@ -1,7 +1,9 @@ -## Getting Started with OACCT project +## Getting Started with the OACCT project -This Project is using Django and React on hybrid architecture. +Note: the current name of the application is OACT. The second C is reserved for future use. + +This Project is using Django and React in a hybrid architecture. ![How JavaScript React and Django are integrated!](https://www.saaspegasus.com/static/images/web/modern-javascript/js-pipeline-with-django.png "Django Hybrid design") source: [How to build a React application in a Django project](https://www.saaspegasus.com/guides/modern-javascript-for-django-developers/integrating-django-react/) \ No newline at end of file diff --git a/assets/src/App.js b/assets/src/App.js index 378ae533..16aa8b05 100644 --- a/assets/src/App.js +++ b/assets/src/App.js @@ -1,81 +1,98 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React from "react"; import "./App.css" import SearchFilterFields from './pages/SearchFilterFields' import Footer from './components/layout/Footer' import About from "./pages/About" import Help from "./pages/Help" import Noresult from "./pages/Noresult" import {Switch, Route } from "react-router-dom" import Box from '@material-ui/core/Box' import MenuAppBar from "./components/layout/MenuAppBar" import { makeStyles } from '@material-ui/core/styles'; const useStyles = makeStyles((theme) => ({ root: { flexGrow: 1, }, })); function App () { const classes = useStyles(); return(
{ window.location.href = '/api/'; return null; }}/> { window.location.href = '/api/openapi'; return null; }}/> { window.location.href = '/admin/'; return null; }}/> { window.location.href = '/styleguide/index.html'; return null; }}/> { window.location.href = '/sphinx/index.html'; return null; }}/>
) } export default App \ No newline at end of file diff --git a/assets/src/App.test.js b/assets/src/App.test.js index 1f03afee..4a7d7746 100644 --- a/assets/src/App.test.js +++ b/assets/src/App.test.js @@ -1,8 +1,25 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import { render, screen } from '@testing-library/react'; import App from './App'; test('renders learn react link', () => { render(); const linkElement = screen.getByText(/learn react/i); expect(linkElement).toBeInTheDocument(); }); diff --git a/assets/src/ContextProvider.js b/assets/src/ContextProvider.js index 59bc99bb..861b7f07 100644 --- a/assets/src/ContextProvider.js +++ b/assets/src/ContextProvider.js @@ -1,146 +1,163 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React, { useCallback, useState, useEffect } from "react" import { getListOfInstitution } from './services/requests/Institution' import { getListOfJournal } from "./services/requests/Journal" import { getListOfFunder } from "./services/requests/Funder" const Context = React.createContext() function ContextProvider({ children }) { //call the custom hook for listing field with api const [institList, setInstitList] = useState([]); const [journalList, setJournalList] = useState([]); const [funderList, setFunderList] = useState([]); const [institId, setInstitId] = React.useState(''); const [institName, setInstitName] = React.useState(''); const [journalId, setJournalId] = React.useState(''); const [journalName, setJournalName] = React.useState(''); const [funderId, setFunderId] = React.useState(''); const [funderName, setFunderName] = React.useState(''); const [url, setUrl] = React.useState(''); const [refList, setRefList] = useState([]); const getInstitListFromApi = useCallback(async () => { try { const response = await getListOfInstitution() setInstitList(response.data) } catch (error) { console.log(`error 700 from Get Institution- ${error.message}`) } }, []) const getJournalListFromApi = useCallback(async () => { try { const response = await getListOfJournal() /* patch journal titles for inactive journals, to avoid confusions with continuing titles */ response.data.forEach(element => { if (element.end_year != 9999) { element.name += ' [' + element.starting_year.toString()+ '-' + element.end_year.toString() + ']' } }); setJournalList(response.data) } catch (error) { console.log(`error 700 from Get Journal- ${error.message}`) } }, []) const getFunderListFromApi = useCallback(async () => { try { const response = await getListOfFunder() setFunderList(response.data) } catch (error) { console.log(`error 700 from Get Funder- ${error.message}`) } }, []) useEffect(() => { getInstitListFromApi(), getJournalListFromApi(), getFunderListFromApi() }, []) //get the institution Id and update request array function getSelectedInstitId(name) { const updateArr = institList.map(item => { if (item.name === name) { console.log(item.id, item.name) const newItem = item.id const newItemName = item.name setInstitId(newItem) setInstitName(newItemName) return newItem } // return }) // return } //get the journal Id and update request array function getSelectedJournalId(name) { const updateArr = journalList.map(item => { if (item.name === name) { console.log(item.id, item.name) const newItem = item.id const newItemName = item.name setJournalId(newItem) setJournalName(newItemName) return } return }) return } //get the funder Id and update request array function getSelectedFunderId(name) { const updateArr = funderList.map(item => { if (item.name === name) { console.log(item.id, item.name) const newItem = item.id const newItemName = item.name setFunderId(newItem) setFunderName(newItemName) return } return }) return } return ( {children} ) } export { ContextProvider, Context } \ No newline at end of file diff --git a/assets/src/components/DetailCard.js b/assets/src/components/DetailCard.js index c9004e8d..35b7efee 100644 --- a/assets/src/components/DetailCard.js +++ b/assets/src/components/DetailCard.js @@ -1,373 +1,393 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React, {useContext,useEffect} from "react" import {Context} from "../ContextProvider" import { makeStyles } from '@material-ui/core/styles'; import "./detailcard.css" import Typography from '@material-ui/core/Typography'; import Card from '@material-ui/core/Card'; import CardActions from '@material-ui/core/CardActions'; import CardContent from '@material-ui/core/CardContent'; import Button from '@material-ui/core/Button'; import { HiLink } from "react-icons/hi"; import DoneIcon from '@material-ui/icons/Done'; import WarningIcon from '@material-ui/icons/Warning'; import "./termcard.css" import { BsUnlock } from "react-icons/bs"; import { GrDiamond } from "react-icons/gr"; import Chip from '@material-ui/core/Chip'; import ClearSharpIcon from '@material-ui/icons/ClearSharp'; /** * General component description in JSDoc format. Markdown is *supported*. */ const useStyles = makeStyles((theme) => ({ root: { '& > *': { margin: theme.spacing(1), display: 'grid', }, flexGrow: 1, }, formControl: { margin: theme.spacing(1), width: 200, }, selectEmpty: { marginTop: theme.spacing(1), }, chip: { margin: 0.5, }, heading: { fontSize: theme.typography.pxToRem(15), fontWeight: theme.typography.fontWeightRegular, }, })); export default function DetailCard({details}) { /** To manage display for journal.end_year --> Inactive*/ const year = new Date().getFullYear() console.log(year) const { setUrl } = useContext(Context) const classes = useStyles(); useEffect(() => { setUrl(window.location.href) },) if (details !== 'null') { return (
{details.end_year && details.end_year < year && Inactive! {details.starting_year &&

From {details.starting_year} to {details.end_year}

}
}

{details.name}

    {details.issn && details.issn.map(item => { return ( item.issn_type === "1" ?
  • Print ISSN: {item.issn}
  • : item.issn_type === "2" ?
  • Electronic ISSN: {item.issn}
  • : item.issn_type === "3" ?
  • Other ISSN: {item.issn}
  • :null ) })}
{details.publisher && details.publisher.map(item => { return
{item.oa_policies ? : null }
})}
{details.country && details.country.map(item => { return
{item.name}
; })} {!details.end_year && (details.starting_year != 0) &&
Since {details.starting_year}
}
{details.oa_status ?
{details.oa_status.status !== "UNKNOWN" ?
{ details.oa_status.status === "Gold" ?

Open Access Status:

: details.oa_status.status === "Diamond" ? <> Open Access Status: {/* {details.oa_status.status} */} :details.oa_status.status === "Full" ?

Open Access Status:

:details.oa_status.status === "Hybrid" ?

Open Access Status:

:details.oa_status.status === "Green" ?

Open Access Status:

:details.oa_status.status === "none" ?

Open Access Status:

:null }
:null}
:null}
{details.language && details.language.map(item =>(

Language: {item.name}

)) } {details.doaj_seal &&
} {details.doaj_status && details.issn[0] &&
} {details.lockss &&
} {details.portico && details.issn[0] &&
} {details.nlch &&
} - {details.qoam_av_score && -
-
- -
+ {details.qoam_av_score && details.issn.map(some_issn => ( + (some_issn.issn_type == '2') && +
+
+ +
+ )) + } {/* ROR and FUNDRED not needed in the current version */} {/* {details.ror ?
:null} {details.fundref ?
:null} } } */}
{details.website ? : } {details.oa_options ? :null} {details.ir_name ?
:null}
) } else { return null } } \ No newline at end of file diff --git a/assets/src/components/ResultCard.js b/assets/src/components/ResultCard.js index f9c74fc8..0dde81f8 100644 --- a/assets/src/components/ResultCard.js +++ b/assets/src/components/ResultCard.js @@ -1,170 +1,190 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React from "react" import { makeStyles } from '@material-ui/core/styles'; import "./ResultCard.css" import TermCard from "../components/TermCard" import Accordion from '@material-ui/core/Accordion'; import AccordionSummary from '@material-ui/core/AccordionSummary'; import AccordionDetails from '@material-ui/core/AccordionDetails'; import Typography from '@material-ui/core/Typography'; import ExpandMoreIcon from '@material-ui/icons/ExpandMore'; import Badge from '@material-ui/core/Badge'; import { HiOutlineDocumentDuplicate } from "react-icons/hi"; import { HiOutlineDocument } from "react-icons/hi"; import Grid from '@material-ui/core/Grid' import Box from '@material-ui/core/Box' import Container from '@material-ui/core/Container'; const useStyles = makeStyles((theme) =>({ root: { flexGrow: 1, }, chip: { margin: 0.5, }, })) export default function ResultCard({result}) { const classes = useStyles(); //create one array to organize the frontend output const termresult = [] const termArray = result?.map(i=>( // get condition details i.term?.map(j =>( // termresult.push([j, i.id, i.comment, i.condition_type]) termresult.push([j, [i.id, i.comment, i.condition_type, i.source]]) )) )) //groupyBy array function groupBy(objectArray, property) { // console.log(objectArray) return objectArray.reduce((acc, obj) => { // console.log(obj[0]) - const key = obj[0][property][0].description; - - if (!acc[key]) { + // Modified from original version as we use it to group by a property that is part of an array + for (const repeatable_property of obj[0][property]) { + const key = repeatable_property.description; + if (!acc[key]) { acc[key] = []; - } - // Add object to list for given key's value - acc[key].push(obj); - return acc; + } + + // Add object to list for given key's value + acc[key].push(obj); + } + return acc; }, {}); } const groupedTerm = groupBy(termresult, 'version') //first version // console.log(groupedTerm[1]) //convert object into array const termItem = Object.entries(groupedTerm) //manage the display order function orderVersion (version) { if (version[0] ==="Submitted version") { version.unshift(3) } else if (version[0] === "Published version") { version.unshift(1) } else if (version[0] === "Accepted version") { version.unshift(2) } } //apply the function for each version termItem?.map(i=>( orderVersion(i) )) termItem.sort() console.log(termItem) const displayVersion = termItem?.map(item =>(
} aria-controls="panel1a-content" id="panel1a-header" > {item?.map(i => ( {typeof i === "string" && i} {typeof i === "object" && i.length > 1 ? : typeof i === "object" && } )) } {item?.map(j => ( typeof j === "object" && j?.map(k => ( )) )) }
)) return ( //level 0
{displayVersion}
) } \ No newline at end of file diff --git a/assets/src/components/TermCard.js b/assets/src/components/TermCard.js index a667d0ee..ec537b2b 100644 --- a/assets/src/components/TermCard.js +++ b/assets/src/components/TermCard.js @@ -1,305 +1,323 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React, {useState, useContext, useEffect} from "react" import {Context} from "../ContextProvider" import { makeStyles } from '@material-ui/core/styles'; import "./termcard.css" import Chip from '@material-ui/core/Chip'; import DoneIcon from '@material-ui/icons/Done'; import HighlightOffIcon from '@material-ui/icons/HighlightOff'; import Typography from '@material-ui/core/Typography'; import DateRangeIcon from '@material-ui/icons/DateRange'; import { FaCoins, FaRegHandshake, FaUserGraduate } from 'react-icons/fa'; import { RiFilePaper2Line } from 'react-icons/ri'; import Button from '@material-ui/core/Button'; import { HiLink } from "react-icons/hi"; import { GrInfo } from "react-icons/gr"; import Container from '@material-ui/core/Container' import Grid from '@material-ui/core/Grid' import Card from '@material-ui/core/Card'; import CardActions from '@material-ui/core/CardActions'; import CardContent from '@material-ui/core/CardContent'; import Tooltip from '@material-ui/core/Tooltip'; import { BsNewspaper } from 'react-icons/bs'; import { FiFlag } from 'react-icons/fi'; const useStyles = makeStyles((theme) =>({ card: { width: '100%', marginTop: "1rem", textAlign:'left' }, root: { flexGrow: 1, textAlign:'left', }, chip: { margin: 0.5, }, heading: { fontSize: theme.typography.pxToRem(15), fontWeight: theme.typography.fontWeightRegular, }, })) const IDConditionTypeJournal = 3; const IDConditionTypeOrganization = 1; const IDConditionTypeAgreement = 2; export default function TermCard({term}) { const classes = useStyles(); //call the custom hook to share the state between different level componant const {url} = useContext(Context) const [ref, setRef] = useState("") console.log(ref) useEffect(() => { // ## Update ref term by creating a specific Reference based on the Condition set ID = C and Term.id = T setRef( term.map( j=> ( j[0] && `C${j[0]}/T${term[0].id}` )) ) },[]); // render once function handleClick () { // ## Create mail template to report a modification, contain the actual Url and the reference Term Card - window.open(`mailto:publishsupport@epfl.ch?subject= OACCT Modification request for ${encodeURIComponent(url)} Term Card Reference: ${ref[1]} &body=Request Description:`) + window.open(`mailto:publishsupport@epfl.ch?subject= OACT Modification request for ${encodeURIComponent(url)} Term Card Reference: ${ref[1]} &body=Request Description:`) } console.log(`cost factor data: ${term[0].cost_factor[0].cost_factor_type.name}`) const licenceIcon = term[0].licence?.map(i=>( <> { (i.name_or_abbrev != 'UNKNOWN') ? : } )) const cost = term[0].cost_factor?.map( i => ( <> { (i.cost_factor_type.name != 'UNKNOWN') ? : null } )) const termArchive = term[0].ir_archiving && term[0].ir_archiving ? ( } label={"Upload to institutional repository"} // variant="outlined" // clickable={handleClick} // color="secondary" // onDelete={handleDelete} style={{ background: "#DAF7A6"}} // title="This is more information" /> ): ( } label="Upload to institutional repository" // clickable={handleClick} // color="secondary" // onDelete={handleDelete} style={{ background: "#f50057"}} // title="This is more information" /> ) - const embargo = ( + const embargo = ( term[0].embargo_months != null ? } - label={"Embargo: " + term[0].embargo_months + " Month(s)"} + label={ ("Embargo: " + term[0].embargo_months + " Month(s)") } // variant="outlined" // clickable={handleClick} // color="secondary" // onDelete={handleDelete} style={{ background: "#FFFFFF"}} // title="This is more information" /> + : null ) return (
{term.map( j=> ( j[0] && <>

{j[2].id === IDConditionTypeJournal && } {j[2].id === IDConditionTypeAgreement && } {j[2].id === IDConditionTypeOrganization && } {j[2].condition_issuer} {j[1] ?

{j[1]}
: ''}

{ j[3] && }

)) }
{/* filter Unknow cost factor type to not display */} {cost} {licenceIcon} {termArchive} {embargo}
{term[0].comment ?

{term[0].comment}

:null}
{term.map( j=> ( j[0] && C{j[0]}/T{term[0].id} )) }
{term[0].source ? :null}
) } diff --git a/assets/src/components/layout/Footer.js b/assets/src/components/layout/Footer.js index 38a25ff9..859d95f2 100644 --- a/assets/src/components/layout/Footer.js +++ b/assets/src/components/layout/Footer.js @@ -1,67 +1,88 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React from "react" import "./FooterStyles.css" import Container from '@material-ui/core/Container' import Grid from '@material-ui/core/Grid' import Box from '@material-ui/core/Box' import Link from '@material-ui/core/Link' /** * About Page Information. * * @version 0.0.1 * @author [George brune](https://github.com/sapegin) * @author [Andy Krings-Stern](https://github.com/ankri) * @see http://github.com */ export default function Footer () { return (
Sponsor - Home - swissuniversities - - - P5 - scientific-information + swissuniversities - OACCT Creators + OACT Creators Ecole polytechnique fédérale de Lausanne (EPFL) Université de Genève - Contact + Contact - Email us + Email us - Open Access Compliance Check Tool ® {new Date().getFullYear()} | Test version + Open Access Check Tool ® {new Date().getFullYear()} EPFL-UNIGE
+ This application is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + Source code is available from our Git repository on C4science. + Application data is distributed under the CC BY-NC-SA 4.0 license. +
) } diff --git a/assets/src/components/layout/MenuAppBar.js b/assets/src/components/layout/MenuAppBar.js index 8e5ac5f6..089ae141 100644 --- a/assets/src/components/layout/MenuAppBar.js +++ b/assets/src/components/layout/MenuAppBar.js @@ -1,145 +1,162 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React from 'react'; import { makeStyles } from '@material-ui/core/styles'; import AppBar from '@material-ui/core/AppBar'; import Toolbar from '@material-ui/core/Toolbar'; import Typography from '@material-ui/core/Typography'; import IconButton from '@material-ui/core/IconButton'; import MenuIcon from '@material-ui/icons/Menu'; import AccountCircle from '@material-ui/icons/AccountCircle'; import Switch from '@material-ui/core/Switch'; import FormControlLabel from '@material-ui/core/FormControlLabel'; import FormGroup from '@material-ui/core/FormGroup'; import MenuItem from '@material-ui/core/MenuItem'; import Menu from '@material-ui/core/Menu'; import {Link} from "react-router-dom" import "./header.css" import Logo from './logo.svg' import Container from '@material-ui/core/Container'; const useStyles = makeStyles((theme) => ({ root: { flexGrow: 1, }, menuButton: { marginRight: theme.spacing(2), }, title: { flexGrow: 1, }, })); export default function MenuAppBar() { const classes = useStyles(); const [auth, setAuth] = React.useState(false); const [anchorEl, setAnchorEl] = React.useState(null); const open = Boolean(anchorEl); const handleChange = (event) => { setAuth(event.target.checked); }; const handleMenu = (event) => { setAnchorEl(event.currentTarget); }; const handleClose = () => { setAnchorEl(null); }; return (
{/* } label={auth ? 'Logout' : 'Login'} /> */} Check Tool API Login Help About Front-end Documentation Back-end Documentation Open API Documentation {/* Documentation */} {auth && (
Admin My account
)}
); } \ No newline at end of file diff --git a/assets/src/components/layout/NavBar.js b/assets/src/components/layout/NavBar.js index 037669c7..591c07a3 100644 --- a/assets/src/components/layout/NavBar.js +++ b/assets/src/components/layout/NavBar.js @@ -1,32 +1,49 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React from 'react' import AppBar from '@material-ui/core/AppBar' import Toolbar from '@material-ui/core/Toolbar' import Typography from '@material-ui/core/Typography' import Logo from './logo.svg' import {Link} from "react-router-dom" export default function NavBar() { return ( ) } diff --git a/assets/src/http-common.js b/assets/src/http-common.js index 5b11f3eb..741b2920 100644 --- a/assets/src/http-common.js +++ b/assets/src/http-common.js @@ -1,8 +1,25 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import axios from "axios"; export default axios.create({ baseURL: "http://localhost:8080/api", headers: { "Content-type": "application/json" } }); \ No newline at end of file diff --git a/assets/src/index.js b/assets/src/index.js index 48512b1f..17af1c27 100644 --- a/assets/src/index.js +++ b/assets/src/index.js @@ -1,24 +1,41 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React from "react" import ReactDOM from "react-dom"; import App from "./App"; import "core-js/stable"; import "regenerator-runtime/runtime"; import "./index.css" import {HashRouter as Router } from "react-router-dom" import {ContextProvider} from "./ContextProvider" import { QueryClientProvider, QueryClient } from 'react-query' import { ReactQueryDevtools } from 'react-query/devtools' const queryClient = new QueryClient() ReactDOM.render( , document.getElementById('app') ) \ No newline at end of file diff --git a/assets/src/pages/About.js b/assets/src/pages/About.js index dd361d15..05a76015 100644 --- a/assets/src/pages/About.js +++ b/assets/src/pages/About.js @@ -1,39 +1,62 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React from "react" import "./about.css" /** * About Page Information. * * @version 0.0.1 * @author [George brune](https://github.com/sapegin) * @author [Andy Krings-Stern](https://github.com/ankri) * @see http://github.com */ -export default function About () { +export default function About() { return ( -
-

OACCT – About (draft!)

- -
- The OACCT (Open Access Compliance Check Tool) is an online resource, tailored to the Swiss academic community's needs, that gathers the most important information concerning Open-Access publishing. Its principal goal is to guide Swiss researchers in deciding where and how to publish their works in compliance with funders’ and institutional Open Access policies. OACCT provides a list of journals with information aggregated from several sources on a regular basis: -
    -
  • Journal ISSNs (source: ISSN International centre)
  • -
  • Publication conditions (source: Sherpa/Romeo)
  • -
  • Swiss institutions from swissuniversities
  • -
  • Funders from…
  • -
  • To be completed
  • -
-
- -

Data reuse & licence

-
- Please see our terms of use to learn how the data provided by our service may be reused. -
- -

Financing

-
- The OACCT project was co-financed by swissuniversities within the P5 Program “Scientific information: Access, processing and safeguarding” and developed by the university libraries of EPFL and Université de Genève with the support of the Universitätsbibliothek Bern and Université de Lausanne -
-
+
+

About

+
+
+ The OACT (Open Access Check Tool) is an online resource, tailored to the Swiss academic community's needs, + that gathers the most important information concerning Open-Access publishing. Its principal goal is to guide Swiss + researchers in deciding where and how to publish their works in compliance with funders’ and institutional Open Access + policies. OACT provides a list of journals with information aggregated from several sources on a regular basis: + + +
+ +

Data reuse

+ +
Please see our terms of use to learn how the data provided by our service may be reused.
+ +

Financing

+ +
The OACT project was co-financed by swissuniversities within the P5 Program + “Scientific information: Access, processing and safeguarding” and developed by the university libraries + of EPFL and Université de Genève with the support of  + Universitätsbibliothek Bern. +
+
+
) -} +} \ No newline at end of file diff --git a/assets/src/pages/Help.js b/assets/src/pages/Help.js index e897eddc..dd885023 100644 --- a/assets/src/pages/Help.js +++ b/assets/src/pages/Help.js @@ -1,29 +1,107 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React from "react" import "./help.css" export default function Help() { return ( -
-

OACCT – Help (draft!)

+
+

Help

+
+

How to use the tool

-
- A database search can be performed by using three search boxes (Institution, funder and journal). The search results contain the following information: -
    -
  • General information about the selected journal with a link to the journal’s publication conditions
  • -
  • APC discount or information regarding a specific deal for Gold OA
  • -
  • Information about Green OA conditions (source: Sherpa/Romeo)
  • -
  • QOAM score
  • -
  • To be completed
  • - + +
    A database search can be performed by using three search boxes (Institution, funder and journal). Once you click on "Check", the results are displayed below the search fields. + +

    Selected options

    + +
    This field contains a brief summary of your selected options (institution, funder and/or journal) in the form of cards.
    + +
      +
    • Swiss institution +
        +
      • Name
      • +
      • Founding year
      • +
      • Website
      • +
      • Link to the institutional repository (if available)
      • + < /ul> + +
      • Funder +
          +
        • Name
        • +
        • Country
        • +
        • Website
        • +
        +
      • +
      • Journal +
          +
        • Title
        • +
        • ISSN
        • +
        • Link to the journal or publisher's website
        • +
        • Open Access status
        • +
        • Language(s)
        • +
        • DOAJ/LOCKSS/PORTICO information
        • +
        • QOAM score
        • +
        +
      -
    -
    - API info to be added (maybe on a different page) -
    -

    Data reuse & licence

    -
    - Please see our terms of use to obtain information about how the data provided by this service may be reused. -
    +

    Search results

    + +
    The search results provide an overview over the costs and most benefits from publishing in a given journal + or by making use of a publishing agreement. They come in the form of institutions’ OA policies, + journals’ publishing policies and publishing agreements and are grouped by the version + of a publication they concern (submitted/preprint, accepted/postprint or published/final). + The type and amount of search results depend on the choice of the search fields: + Choosing both an institution and a journal will show the institution’s OA policy alongside + the journal’s publishing policy condition (and possibly existing publishing agreements), + which facilitates comparing them. + Visually, the condition sets are represented by cards as well and contain the following information: +
    + +
      +
    • Type of condition set (institutions’ OA policies, journals' publishing policies and publishing agreements)
    • +
    • A set of four conditions, consisting of +
        +
      • Cost factors: Absolute publishing cost (APC), APC discount or refund
      • +
      • The licence under which the publication appears
      • +
      • Indicator whether it is allowed to archive the publication in the authors' institutional repositories
      • +
      • Embargo period
      • +
      +
    • +
    • Additional conditions defined by a publisher, funder or institution (free text)
    • +
    • The term card reference number (Cxxxx/Tyyyy)
    • +
    • A "Modification request" button that allows notifying the platform's administrators of errors in the corresponding term card.
    • +
    +
+ +

API access

+ +
Please see the back-end documentation and the API schema for further information.
+ +

Data reuse and licence

+ +
+ +
CC BY-NC-SA logo + The content of this web site and the data provided through the API are distributed under the  + Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) license. + +
) } diff --git a/assets/src/pages/Noresult.js b/assets/src/pages/Noresult.js index 6c77effe..109239b5 100644 --- a/assets/src/pages/Noresult.js +++ b/assets/src/pages/Noresult.js @@ -1,24 +1,41 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React from "react" import "./noresult.css" export default function Noresult () { return (

Oops!

404 - The Page can't be found

Go TO Homepage
) } diff --git a/assets/src/pages/SearchFilterFields.css b/assets/src/pages/SearchFilterFields.css index 8eb8360c..bcb46368 100644 --- a/assets/src/pages/SearchFilterFields.css +++ b/assets/src/pages/SearchFilterFields.css @@ -1,49 +1,53 @@ .form-input { margin-bottom: 1rem !important; } - .App-btn { + +.App-btn { background-color: #3771C8 !important; color: white !important; width: 99% ; } +.field-comment { + font-size: .75em +} .App-btn:hover { background-color: #D40000; } @media only screen and (min-width: 768px) { .form-input { margin-right: 1rem !important; } .App-btn { width: 99%; background-color: #3771C8 !important; color: white !important; } .App-btn:hover { background-color: #D40000 !important; } } @media only screen and (min-width: 1024px) { .form-input { margin-right: 1rem !important; } .App-btn { width: 99%; background-color: #3771C8 ; color: white; bottom: -5px; } .App-btn:hover { background-color: #D40000; } } \ No newline at end of file diff --git a/assets/src/pages/SearchFilterFields.js b/assets/src/pages/SearchFilterFields.js index 121b0b2d..3f35b7a9 100644 --- a/assets/src/pages/SearchFilterFields.js +++ b/assets/src/pages/SearchFilterFields.js @@ -1,1227 +1,1276 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React, {useContext, useState, useEffect} from 'react'; import "./SearchFilterFields.css" import { makeStyles } from '@material-ui/core/styles'; import Button from '@material-ui/core/Button'; import FormControl from '@material-ui/core/FormControl'; import TextField from '@material-ui/core/TextField'; import Autocomplete from '@material-ui/lab/Autocomplete'; import { searchCondi, searchorganizationonly, searchjournalonly, searchInstitFunder, searchCondi3 } from '../services/requests/Condition' import {getJournal} from '../services/requests/Journal' import {getFunder} from '../services/requests/Funder' import {getInstitution} from '../services/requests/Institution' import Accordion from '@material-ui/core/Accordion'; import AccordionSummary from '@material-ui/core/AccordionSummary'; import ExpandMoreIcon from '@material-ui/icons/ExpandMore'; import Typography from '@material-ui/core/Typography'; import AccordionDetails from '@material-ui/core/AccordionDetails'; import Grid from '@material-ui/core/Grid' import Box from '@material-ui/core/Box' import Container from '@material-ui/core/Container'; import {Context} from "../ContextProvider" import ResultCard from "../components/ResultCard" import DetailCard from "../components/DetailCard" import CircularProgress from '@material-ui/core/CircularProgress' import Fab from '@material-ui/core/Fab' import ShareIcon from '@material-ui/icons/Share' import Dialog from '@material-ui/core/Dialog'; import DialogActions from '@material-ui/core/DialogActions'; import DialogContent from '@material-ui/core/DialogContent'; import DialogContentText from '@material-ui/core/DialogContentText'; import DialogTitle from '@material-ui/core/DialogTitle'; import Slide from '@material-ui/core/Slide'; import Welcome from './welcome'; import { useHistory,useLocation } from "react-router-dom"; import PropTypes from 'prop-types'; // import { FiFlag } from 'react-icons/fi'; import FlagOutlinedIcon from '@material-ui/icons/FlagOutlined'; import Tooltip from '@material-ui/core/Tooltip'; function useQuery() { return new URLSearchParams(useLocation().search); } const Transition = React.forwardRef(function Transition(props, ref) { return ; }); // ID of condition type that must be excluded in some API requests const j_only_id = 3 const o_only_id = 1 const useStyles = makeStyles((theme) => ({ root: { flexGrow: 1, }, chip: { margin: 0.5, }, })); /** - * Contain the main logic of OACCT tools to filter and send the appropriate request. + * Contain the main logic of OACT tools to filter and send the appropriate request. * @version 0.0.1 * @author [Hugo Galuppo](https://github.com/hgpulse) */ export default function SearchFilterFields() { /** Access to URL parameter */ const history = useHistory(); console.log(history) let query = useQuery() //state that allow to hide or show the share url button const [open, setOpen] = React.useState(false) const classes = useStyles(); //call the custom hook to share the state between different level componant const { getSelectedInstitId, getSelectedJournalId, getSelectedFunderId, institList, journalList, funderList, institId, journalId, funderId, setInstitId, setJournalId, setFunderId, setUrl, url } = useContext(Context) //responses const [conditions, setConditions] = useState([]); const [details, setDetails] = useState([]); const [result, updateResult] = useState([]); //Manage the loading state to hide or show the spinner in the search bar const [loading, setLoading] = useState(false); // const [url, setUrl] = useState(window.location.href); useEffect(() => { setDetails('null') setUrl(window.location.href) //handle Url param console.log(history) if (history.location.pathname === "/check") { console.log("this an url to check") setDetails('fromUrl') // alert(query.get("institution")) if (query.get("institution") && !query.get("funder") && !query.get("journal")){ //get organizations conditions // alert(`get api organization Condition only: ${institId}`) //condtion type is not journal only = 1 // Get the user const sendSearchInstitOnly = async () => { try { const resp = await searchorganizationonly(query.get("institution"), j_only_id) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } console.log(details) const sendGetrequest = async () => { try { const resp = await getInstitution(query.get("institution")) // console.log(`instit name from api: ${resp.data.name}`) // setInstitName(resp.data.name) updateResult(arr => [...arr, resp.data]) // if (details === "null") { // setDetails(resp.data) // } // else { // setDetails(prevArray => [...prevArray, resp.data]) // } } catch (err) { // Handle Error Here console.error(err); } } sendSearchInstitOnly().then( sendGetrequest() ) history.push({pathname:`check`, search:`institution=${query.get("institution")}`}) } else if (!query.get("institution") && !query.get("journal") && query.get("funder")){ //get funder conditions // alert(`get api funder Condition only: ${funderId}`) //condtion type is not journal only = 1 const sendSearchOrgaOnly = async () => { try { const resp = await searchorganizationonly(query.get("funder"), j_only_id) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetrequest = async () => { try { const resp = await getFunder(query.get("funder")) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } sendSearchOrgaOnly().then( sendGetrequest() ) history.push({pathname:`check`, search:`funder=${query.get("funder")}`}) } else if (!query.get("funder") && !query.get("institution") && query.get("journal")){ //get journals conditions // alert(`get api journal Condition only: ${journalId}`) //condtion type is not institution only = 2 //get journal detail const sendSearchJournalOnly = async () => { try { const resp = await searchjournalonly(query.get("journal"), o_only_id) console.log(resp.data) setConditions(arr => [...arr, resp.data]) // setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetrequest = async () => { try { const resp = await getJournal(query.get("journal")) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } sendSearchJournalOnly().then( sendGetrequest() ) history.push({pathname:`check`, search:`journal=${query.get("journal")}`}) } else if (query.get("institution") && query.get("funder") && !query.get("journal")) { //alert(`get api Filter Conditions SET--> Journal: ${journalId} VS Institution: ${institId}`) //condtion type journal/condition = 3 const sendSearchCondi = async () => { try { const resp = await searchInstitFunder(query.get("institution"), query.get("funder"), j_only_id) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetInstit = async () => { try { const resp = await getInstitution(query.get("institution")) console.log(resp.data) // detailArray.push(resp.data) // setDetails(detailArray) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } const sendGetFunder = async () => { try { const resp = await getFunder(query.get("funder")) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } sendSearchCondi().then( sendGetInstit().then(sendGetFunder()) ) history.push({pathname:`check`, search: `institution=${query.get("institution")}&funder=${query.get("funder")}`}) } else if (query.get("institution") && query.get("journal") && !query.get("funder")) { //alert(`get api Filter Conditions SET--> Journal: ${journalId} VS Institution: ${institId}`) //condtion type journal/condition = 3 const sendSearchCondi = async () => { try { const resp = await searchCondi(query.get("journal"),query.get("institution")) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetInstit = async () => { try { const resp = await getInstitution(query.get("institution")) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } const sendGetJournal = async () => { try { const resp = await getJournal(query.get("journal")) console.log(resp.data) // detailArray.push(resp.data) // setDetails(detailArray) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } sendSearchCondi().then( sendGetInstit().then(sendGetJournal) ) history.push({pathname:`check`, search: `institution=${query.get("institution")}&journal=${query.get("journal")}`}) } else if (!query.get("institution") && query.get("journal") && query.get("funder")) { // alert(`get api Filter Conditions SET--> Journal: ${journalId} VS Institution: ${funderId}`) //condtion type journal/institution/funder conditions = 3 const sendGetCondi = async () => { try { const resp = await searchCondi(query.get("journal"),query.get("funder")) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetFunder = async () => { try { const resp = await getFunder(query.get("funder")) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } const sendGetJournal = async () => { try { const resp = await getJournal(query.get("journal")) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } sendGetCondi().then( sendGetFunder().then( sendGetJournal() ) ) history.push({pathname:`check`, search: `funder=${query.get("funder")}&journal=${query.get("journal")}`}) } else if (query.get("institution") && query.get("journal") && query.get("funder")) { // alert(`get api Filter Conditions SET--> Journal: ${journalId} VS Institution: ${funderId}`) //condtion type journal/institution/funder conditions = 3 console.log("main check !") //(institution + journal) const detailArray = [] const sendGetCondi = async () => { try { const resp = await searchCondi3(query.get("institution"),query.get("journal"),query.get("funder")) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetInstit = async () => { try { const resp = await getInstitution(query.get("institution")) console.log(resp.data) detailArray.push(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } const sendGetFunder = async () => { try { const resp = await getFunder(query.get("funder")) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } const sendGetJournal = async () => { try { const resp = await getJournal(query.get("journal")) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } //order requests sendGetCondi() sendGetInstit().then( sendGetFunder() ).then( sendGetJournal() ) history.push({pathname:`check`, search: `institution=${query.get("institution")}&funder=${query.get("funder")}&journal=${query.get("journal")}`}) } } }, []) //useEffect on Url state change React.useEffect(() => { //condition to avoid infinite loop if (history.location.pathname === "/") { setConditions([]) setDetails('null') updateResult([]) setUrl(window.location.href) } }, [url]); function handleReport () { // ## Create mail template to report a modification, contain the actual Url and the reference Term Card - window.open(`mailto:publishsupport@epfl.ch?subject= OACCT Modification request for ${encodeURIComponent(url)} &body=Request Description:`) + window.open(`mailto:publishsupport@epfl.ch?subject= OACT Modification request for ${encodeURIComponent(url)} &body=Request Description:`) } //copy url to clipboard function handlShare(e) { setOpen(true) navigator.clipboard.writeText(url) } const handleClose = () => { setOpen(false); }; function handleInstit(e, newInputValue) { if (newInputValue){ getSelectedInstitId(newInputValue) return } // if (institName){ // getSelectedInstitId(institName) // return // } setInstitId("") } function handleFunder(e, newInputValue) { console.log(newInputValue) if (newInputValue){ getSelectedFunderId(newInputValue) return } setFunderId("") } function handleJournal(e, newInputValue) { if (newInputValue){ getSelectedJournalId(newInputValue) return } setJournalId("") } function handleSubmit(e) { setLoading(true) e.preventDefault() //reset precedent results setConditions([]) setDetails([]) updateResult([]) if (!institId && !journalId && !funderId){ setLoading(false) setDetails('null') } if (institId && !journalId && !funderId){ //get organizations conditions // alert(`get api organization Condition only: ${institId}`) //condtion type is not journal only = 1 // Get the user const sendSearchInstitOnly = async () => { try { const resp = await searchorganizationonly(institId, j_only_id) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } console.log(details) const sendGetrequest = async () => { try { const resp = await getInstitution(institId) console.log(resp.data) updateResult(arr => [...arr, resp.data]) // if (details === "null") { // setDetails(resp.data) // } // else { // setDetails(prevArray => [...prevArray, resp.data]) // } } catch (err) { // Handle Error Here console.error(err); } } sendSearchInstitOnly().then( sendGetrequest() ) history.push({pathname:`check`, search:`institution=${institId}`}) } else if (!institId && !journalId && funderId){ //get funder conditions // alert(`get api funder Condition only: ${funderId}`) //condtion type is not journal only = 1 const sendSearchOrgaOnly = async () => { try { const resp = await searchorganizationonly(funderId, j_only_id) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetrequest = async () => { try { const resp = await getFunder(funderId) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } sendSearchOrgaOnly().then( sendGetrequest() ) history.push({pathname:`check`, search:`funder=${funderId}`}) } else if (!funderId && !institId && journalId){ //get journals conditions // alert(`get api journal Condition only: ${journalId}`) //condtion type is not institution only = 2 //get journal detail const sendSearchJournalOnly = async () => { try { const resp = await searchjournalonly(journalId, o_only_id) console.log(resp.data) setConditions(arr => [...arr, resp.data]) // setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetrequest = async () => { try { const resp = await getJournal(journalId) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } sendSearchJournalOnly().then( sendGetrequest() ) history.push({pathname:`check`, search:`journal=${journalId}`}) } else if (institId && funderId && !journalId) { //alert(`get api Filter Conditions SET--> Journal: ${journalId} VS Institution: ${institId}`) //condtion type journal/condition = 3 const sendSearchCondi = async () => { try { const resp = await searchInstitFunder(institId, funderId, j_only_id) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetInstit = async () => { try { const resp = await getInstitution(institId) console.log(resp.data) //manage the order output // detailArray.push(resp.data) // setDetails(detailArray) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } const sendGetFunder = async () => { try { const resp = await getFunder(funderId) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } sendSearchCondi().then( sendGetInstit().then(sendGetFunder()) ) history.push({pathname:`check`, search: `institution=${institId}&funder=${funderId}`}) } else if (institId && journalId && !funderId) { //alert(`get api Filter Conditions SET--> Journal: ${journalId} VS Institution: ${institId}`) //condtion type journal/condition = 3 const sendSearchCondi = async () => { try { const resp = await searchCondi(journalId,institId) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetInstit = async () => { try { const resp = await getInstitution(institId) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } const sendGetJournal = async () => { try { const resp = await getJournal(journalId) console.log(resp.data) // detailArray.push(resp.data) // setDetails(detailArray) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } sendSearchCondi().then( sendGetInstit().then(sendGetJournal) ) history.push({pathname:`check`, search: `institution=${institId}&journal=${journalId}`}) } else if (!institId && journalId && funderId) { // alert(`get api Filter Conditions SET--> Journal: ${journalId} VS Institution: ${funderId}`) //condtion type journal/institution/funder conditions = 3 const sendGetCondi = async () => { try { const resp = await searchCondi(journalId,funderId) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetFunder = async () => { try { const resp = await getFunder(funderId) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } const sendGetJournal = async () => { try { const resp = await getJournal(journalId) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } sendGetCondi().then( sendGetJournal().then( sendGetFunder() ) ) history.push({pathname:`check`, search: `funder=${funderId}&journal=${journalId}`}) } else if (institId && journalId && funderId) { // alert(`get api Filter Conditions SET--> Journal: ${journalId} VS Institution: ${funderId}`) //condtion type journal/institution/funder conditions = 3 console.log("main check !") //(institution + journal) const detailArray = [] const sendGetCondi = async () => { try { const resp = await searchCondi3(institId,journalId,funderId) console.log(resp.data) setConditions(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } setLoading(false) } const sendGetInstit = async () => { try { const resp = await getInstitution(institId) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } const sendGetFunder = async () => { try { const resp = await getFunder(funderId) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } const sendGetJournal = async () => { try { const resp = await getJournal(journalId) console.log(resp.data) updateResult(arr => [...arr, resp.data]) } catch (err) { // Handle Error Here console.error(err); } } //order the request sendGetCondi() sendGetInstit().then( sendGetFunder() ).then( sendGetJournal() ) history.push({pathname:`check`, search: `institution=${institId}&funder=${funderId}&journal=${journalId}`}) } } console.log(`all conditions SET: ${conditions}`) console.log(details) console.log(`Selected Institution ID: ${institId} , Selected Funder: ${funderId}, Selected Journal ID: ${journalId}`) function detailsResult() { console.log(`details: ${details}`) console.log(result) if (details !== 'null') { return (
} aria-controls="panel1a-content" id="panel1a-header" >

Selected option(s)

{result?.map(i => ( ))}
) } } function conditionResults () { return (
{conditions?.map(i=> ( ))}
) } return (
option.name)} renderOption={(params) => ( {params} )} // getOptionLabel={(option) => option.name} // filterOptions={filterOptions} onInputChange={handleInstit} // inputValue={institName} renderInput={(params) => ( )} /> + + { (institList.length > 0) ? +

+ {institList.length} institutions +

+ : +

+ Loading... +

+ }
option.name)} renderOption={(params) => ( {params} )} onInputChange={handleFunder} renderInput={(params) => ( )} /> + + { (funderList.length > 0) ? +

+ {funderList.length} funders +

+ : +

+ Loading... +

+ }
option.name)} renderOption={(params) => ( {params} )} onInputChange={handleJournal} renderInput={(params) => ( )} /> + + { (journalList.length > 0) ? +

+ {journalList.length} journals +

+ : +

+ Loading... +

+ } +
+ {!loading && } {loading && }
{detailsResult()} {conditionResults()} { history.location.pathname === "/" && } {/* { history.location.pathname === "/" && */} {/* } */} {"Share your Result!"} {url}
); } SearchFilterFields.propTypes = { /** Store the selected option/field Result from API. */ details: PropTypes.object, /** Store the individual response for each request. */ result: PropTypes.object, /** Store at the same place the aggregation of all request result */ conditions: PropTypes.object, /** Manage the loading wheels inside the check button. */ loading: PropTypes.bool } diff --git a/assets/src/pages/about.css b/assets/src/pages/about.css index 3ef60b46..4c41ad7a 100644 --- a/assets/src/pages/about.css +++ b/assets/src/pages/about.css @@ -1,12 +1,17 @@ .main { margin: 3rem !important; } +.div { + margin: 5rem; + text-align: left; + } + h1 h2 { margin-bottom: 3rem; + align: center; } - li { text-align: left; } diff --git a/assets/src/pages/help.css b/assets/src/pages/help.css index 5421bcbb..853683ed 100644 --- a/assets/src/pages/help.css +++ b/assets/src/pages/help.css @@ -1,12 +1,24 @@ - - .div{ +.div { margin: 5rem; + text-align: left; } - .list { + +.list { margin-left: 3rem; margin-top: 3rem; } - .list-center { + +.list-center { text-align: center; margin-top: 3rem; - } \ No newline at end of file + } + +h1 h2 { + margin-bottom: 3rem; + align: center; +} + +img { + float: left; + margin-right: 10px; +} \ No newline at end of file diff --git a/assets/src/pages/welcome.css b/assets/src/pages/welcome.css index 5dbbd5f9..3af7b231 100644 --- a/assets/src/pages/welcome.css +++ b/assets/src/pages/welcome.css @@ -1,82 +1,82 @@ .div{ margin-block-end: 5rem; } .flex-container{ display: flex; height: 60rem; /* Or whatever */ flex-direction: column; } .flex-item { /* border-style: solid; */ margin: 1rem; padding: 2rem; - cursor: pointer; + /* cursor: pointer; */ display: block; background: whitesmoke; box-shadow: 0 2px 48px 0 rgba(0, 0, 0, 0.10); -webkit-border-radius: 20px; -moz-border-radius: 20px; border-radius: 20px; padding: 30px; text-align: center; -webkit-transition: all 0.3s ease 0s; -moz-transition: all 0.3s ease 0s; -o-transition: all 0.3s ease 0s; transition: all 0.3s ease 0s; position: relative; margin-bottom: 30px; } /* .flex-item:hover { background-color: #3771C8 ; } */ h2{ font-family: 'Quicksand', sans-serif; } p { text-align: left; font-family: 'Quicksand', sans-serif; } /* IPAD Portrait */ @media only screen and (min-device-width: 768px) and (max-device-width: 1024px) and (orientation: portrait) and (-webkit-min-device-pixel-ratio: 1) { .flex-container{ padding-top: 3rem; margin: 1rem; display: flex; /* or inline-flex */ height: 40rem; /* Or whatever */ width: 42rem; flex-direction: column; align-items: stretch; justify-content: space-around; } .flex-item { margin: 1rem; padding: 2rem; } } /* Desktop */ @media only screen and (min-width: 1024px) { .flex-container{ height: 25rem; /* Or whatever */ display: flex; /* or inline-flex */ flex-direction: row; align-items: stretch; } .flex-item { width: 45rem; margin: 2rem; padding: 2rem; } } diff --git a/assets/src/pages/welcome.js b/assets/src/pages/welcome.js index 109174bd..78eb8fe0 100644 --- a/assets/src/pages/welcome.js +++ b/assets/src/pages/welcome.js @@ -1,37 +1,54 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import React from "react" import "./welcome.css" import Button from '@material-ui/core/Button'; export default function Welcome () { return (

Welcome!

-

The OACCT (Open Access Compliance Check Tool) is an online resource, tailored to the Swiss academic community's needs, that gathers the most important information concerning Open-Access publishing.

+

The OACT (Open Access Check Tool) is an online resource, tailored to the Swiss academic community's needs, that gathers the most important information concerning Open-Access publishing.

Mission

Its principal goal is to guide Swiss researchers in deciding where and how to publish their works in compliance with funders’ and institutional Open Access policies

Where do our data come from?

-

OACCT provides a list of journals with information aggregated from several sources on a regular basis: +

OACT provides a list of journals with information aggregated from several sources on a regular basis:

  • Journal ISSNs (source: ISSN International centre)
  • Publication conditions (source: Sherpa/Romeo)
  • Swiss institutions from swissuniversities

) } diff --git a/assets/src/reportWebVitals.js b/assets/src/reportWebVitals.js index 5253d3ad..3e1b38f4 100644 --- a/assets/src/reportWebVitals.js +++ b/assets/src/reportWebVitals.js @@ -1,13 +1,30 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + const reportWebVitals = onPerfEntry => { if (onPerfEntry && onPerfEntry instanceof Function) { import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => { getCLS(onPerfEntry); getFID(onPerfEntry); getFCP(onPerfEntry); getLCP(onPerfEntry); getTTFB(onPerfEntry); }); } }; export default reportWebVitals; diff --git a/assets/src/services/Api.js b/assets/src/services/Api.js index b0e7d193..bf2fbde3 100644 --- a/assets/src/services/Api.js +++ b/assets/src/services/Api.js @@ -1,19 +1,36 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import axios from 'axios' const Api = axios.create({ baseURL: `/api/`, }) export default Api //How to manage the different adresses dev, prod ? //docker-compose up url http://0.0.0.0:8000/api/ //local: http://127.0.0.1:8000/api/ //Dev: https://oacct-dev.epfl.ch/api/ //Test: https://oacct-test.epfl.ch/api/ //Dev: https://oacct-dev.epfl.ch/api/ diff --git a/assets/src/services/requests/Condition.js b/assets/src/services/requests/Condition.js index b2ff9d39..06b65857 100644 --- a/assets/src/services/requests/Condition.js +++ b/assets/src/services/requests/Condition.js @@ -1,56 +1,73 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import Api from '../Api' let today = new Date().toISOString().slice(0, 10) var date_filter = 'ge(journalcondition.valid_until,' + today + '),le(journalcondition.valid_from,' + today + '),ge(organizationcondition.valid_until,' + today + '),le(organizationcondition.valid_from,' + today + ')' // To stop filtering by validity dates, replace by a trivial filter such as // var date_filter = 'ge(condition.id,0)' export const getCondition = (id) => { return Api.request({ url: `/conditionterm/${id}`, method: 'GET', }) } export const getListOfCondition = () => { return Api.request({ url: `/conditionterm/`, method: 'GET', }) } export const searchCondi = (journalId,institId) => { return Api.request({ url: `/conditionset_light/?and(eq(journalcondition.journal.id,${journalId}),eq(organizationcondition.organization.id,${institId}),${date_filter})`, method: 'GET', }) } export const searchInstitFunder = (institId,funderId,condi) => { return Api.request({ url: `/conditionset_light/?(eq(organizationcondition.organization.id,${institId})|eq(organizationcondition.organization.id,${funderId})),ne(condition_type.id,${condi}),and(${date_filter})`, method: 'GET', }) } export const searchCondi3 = (institId,journalId,funderId) => { return Api.request({ url: `/conditionset_light/?(eq(organizationcondition.organization.id,${institId})|eq(organizationcondition.organization.id,${funderId})),eq(journalcondition.journal.id,${journalId}),and(${date_filter})`, method: 'GET', }) } export const searchorganizationonly = (id,condi) => { return Api.request({ url: `/conditionset_light/?and(eq(organizationcondition.organization.id,${id}),ne(condition_type.id,${condi}),${date_filter})`, method: 'GET', }) } export const searchjournalonly = (id,condi) => { return Api.request({ url: `/conditionset_light/?and(eq(journalcondition.journal.id,${id}),ne(condition_type.id,${condi}),${date_filter})`, method: 'GET', }) } \ No newline at end of file diff --git a/assets/src/services/requests/Funder.js b/assets/src/services/requests/Funder.js index 4b1ceaae..260bd8a2 100644 --- a/assets/src/services/requests/Funder.js +++ b/assets/src/services/requests/Funder.js @@ -1,21 +1,38 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import Api from '../Api' export const getFunder = (id) => { return Api.request({ url: `/funder/${id}`, method: 'GET', }) } export const getListOfFunder = () => { return Api.request({ url: `/funder/`, method: 'GET', }) } export const searchFunderCondi = (id,condi) => { return Api.request({ url: `/organizationcondition/?and(eq(organization.id,${id}),ne(condition_set.condition_type.id,${condi}))`, method: 'GET', }) } \ No newline at end of file diff --git a/assets/src/services/requests/Institution.js b/assets/src/services/requests/Institution.js index ec2275e3..845c1ec6 100644 --- a/assets/src/services/requests/Institution.js +++ b/assets/src/services/requests/Institution.js @@ -1,35 +1,52 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import Api from '../Api' export const getInstitution = (id) => { return Api.request({ url: `/organization/${id}`, method: 'GET', }) } export const getListOfInstitution = () => { return Api.request({ url: `/organization/`, method: 'GET', }) } export const searchListOfInstitutionCondi = (id,condi) => { return Api.request({ url: `/organizationcondition/?and(eq(organization.id,${id}),ne(condition_set.condition_type.id,${condi}))`, method: 'GET', }) } export const getListOfCondiInstitution = () => { return Api.request({ url: `/organizationcondition/`, method: 'GET', }) } export const getInstitutionCondi = (id) => { return Api.request({ url: `/organizationcondition/${id}`, method: 'GET', }) } \ No newline at end of file diff --git a/assets/src/services/requests/Journal.js b/assets/src/services/requests/Journal.js index c5821772..a618e138 100644 --- a/assets/src/services/requests/Journal.js +++ b/assets/src/services/requests/Journal.js @@ -1,24 +1,41 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + import Api from '../Api' export const getJournal = (id) => { return Api.request({ url: `/journal/${id}`, method: 'GET', }) } export const searchListOfJournalCondi = (id,condi) => { return Api.request({ url: `/journalcondition/?and(eq(journal.id,${id}),ne(condition_set.condition_type.id,${condi}))`, method: 'GET', }) } export const getListOfJournal = () => { return Api.request({ url: `/journal_light/`, method: 'GET', }) } \ No newline at end of file diff --git a/assets/src/services/requests/requests.md b/assets/src/services/requests/requests.md index 55406f0b..7a099a64 100644 --- a/assets/src/services/requests/requests.md +++ b/assets/src/services/requests/requests.md @@ -1,24 +1,24 @@ ## Requests React location `src/services/requests` ## Rql RQL (Resource query language) is designed for modern application development. It is built for the web, ready for NoSQL, and highly extensible with simple syntax. This is a query language fast and convenient database interaction. RQL was designed for use in URLs to request object-style data structures. source: [django-rql](https://django-rql.readthedocs.io/) -# Why Rql for OACCT? +# Why Rql for OACT? -The OACCT's data structure has a complicated design to allow different data management use cases (add, update, delete)via API/Backend Admin/frontend. +The OACCT's data structure is designed to allow different data management use cases (add, update, delete) via API/Backend Admin/frontend. A flexible API language such as Rql can fully support these use cases. Rql allow us to do different requests with filters included inside the Url: Exemple inside Condition.js: `/conditionset/?and(eq(journalcondition.journal.id,${id}),ne(condition_type.id,${condi}),${date_filter})` Rql language is fully integrated into Django Rest Framework. It allow us to test the request manually directly to the url adress without changing the models or views. Exemple on dev Url: `https://oacct-dev.epfl.ch/api/conditionset/?and(eq(journalcondition.journal.id,3),eq(organizationcondition.organization.id,11),eq(condition_type.id,1))` diff --git a/assets/src/setupTests.js b/assets/src/setupTests.js index 8f2609b7..ad2ab820 100644 --- a/assets/src/setupTests.js +++ b/assets/src/setupTests.js @@ -1,5 +1,22 @@ +/* +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +*/ + // jest-dom adds custom jest matchers for asserting on DOM nodes. // allows you to do things like: // expect(element).toHaveTextContent(/react/i) // learn more: https://github.com/testing-library/jest-dom import '@testing-library/jest-dom'; diff --git a/conf/nginx-app.conf b/conf/nginx-app.conf index b9b4aa77..b5b0af42 100644 --- a/conf/nginx-app.conf +++ b/conf/nginx-app.conf @@ -1,184 +1,186 @@ # nginx-app.conf # Enable CORS for selected origins # map instead of many if's map $http_origin $cors { default "null"; "https://www.test-cors.org" $http_origin; "https://www.epfl.ch" $http_origin; "http://127.0.0.1" $http_origin; "https://localhost" $http_origin; } # the upstream component nginx needs to connect to upstream django { server unix:/oacct_checker/app.sock; # for a file socket # server 127.0.0.1:8001; # for a web port socket (we'll use this first) } # We want to see the original IP of HTTP requests, not the one from the Openshift gateway -set_real_ip_from 172.31.0.0/16; -set_real_ip_from 10.180.21.0/24; -set_real_ip_from 127.0.0.1/8; +#set_real_ip_from 172.31.0.0/16; +#set_real_ip_from 10.180.21.0/24; +#set_real_ip_from 127.0.0.1/8; # log format as per C2C recommandation 2022-03-28 -log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for"'; +log_format main '$remote_addr - $remote_user [$time_local] "$request" ' '$status $body_bytes_sent "$http_referer" ' '"$http_user_agent" -- "$http_x_forwarded_for" -- "$http_x_real_ip"'; +access_log /dev/stdout main; +error_log /dev/stderr info; + server { listen 8080 default_server; #listen [::]:80 ; server_name 127.0.0.1; ## Redirige le HTTP vers le HTTPS ## #return 301 https://$server_name$request_uri; # default max body size of 1M not sufficient for 1000 journals client_max_body_size 100M; add_header "Content-Security-Policy" "default-src 'self' https://web2018.epfl.ch https://cdn.datatables.net"; add_header "Strict-Transport-Security" "max-age=31536000"; - # Django media; not needed in this project location /media { alias /oacct_checker; # your Django project's media files - amend as required } location /static { alias /oacct_checker/staticfiles; # your Django project's static files - amend as required # Simple requests if ($request_method ~* "(GET|POST)") { add_header "Access-Control-Allow-Origin" "$cors"; } # Preflighted requests if ($request_method = OPTIONS ) { add_header "Access-Control-Allow-Origin" "$cors"; add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD"; add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept"; return 200; } } location /sphinx { alias /oacct_checker/sphinx/_build/html; # Sphinx documentation served separately # Simple requests (standard, probably overkill) if ($request_method ~* "(GET|POST)") { add_header "Access-Control-Allow-Origin" "$cors"; } # Preflighted requests if ($request_method = OPTIONS ) { add_header "Access-Control-Allow-Origin" "$cors"; add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD"; add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept"; return 200; } } location /styleguide { alias /oacct_checker/reactDoc/styleguide; # Sphinx documentation served separately # Simple requests if ($request_method ~* "(GET|POST)") { add_header "Access-Control-Allow-Origin" "$cors"; } # Preflighted requests if ($request_method = OPTIONS ) { add_header "Access-Control-Allow-Origin" "$cors"; add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD"; add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept"; return 200; } } # Finally, send all non-media requests to the Django server. location / { uwsgi_pass django; include /oacct_checker/conf/uwsgi_params; # the uwsgi_params file you installed # Simple requests if ($request_method ~* "(GET|POST)") { add_header "Access-Control-Allow-Origin" "$cors"; } # Preflighted requests if ($request_method = OPTIONS ) { add_header "Access-Control-Allow-Origin" "$cors"; add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD"; add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept"; return 200; } } } # configuration of the server server { # the port your site will be served on, default_server indicates that this server block # is the block to use if no blocks match the server_name # SSL configuration listen 4443 ssl http2 default_server; listen [::]:4443 ssl http2 ; include snippets/self-signed.conf; include snippets/ssl-params.conf; # the domain name it will serve for server_name 127.0.0.1; # substitute your machine's IP address or FQDN charset utf-8; # max upload size client_max_body_size 75M; # adjust to taste add_header "Content-Security-Policy" "default-src 'self' https://web2018.epfl.ch https://cdn.datatables.net"; add_header "Strict-Transport-Security" "max-age=31536000"; # Django media location /media { alias /oacct_checker; # your Django project's media files - amend as required } location /static { alias /oacct_checker/staticfiles; # your Django project's static files - amend as required # Simple requests if ($request_method ~* "(GET|POST)") { add_header "Access-Control-Allow-Origin" "$cors"; } # Preflighted requests if ($request_method = OPTIONS ) { add_header "Access-Control-Allow-Origin" "$cors"; add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD"; add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept"; return 200; } } # Finally, send all non-media requests to the Django server. location / { uwsgi_pass django; include /oacct_checker/conf/uwsgi_params; # the uwsgi_params file you installed # default timout of 60s too short for significant JSON uploads? uwsgi_read_timeout 300s; uwsgi_send_timeout 300s; # Simple requests if ($request_method ~* "(GET|POST)") { add_header "Access-Control-Allow-Origin" "$cors"; } # Preflighted requests if ($request_method = OPTIONS ) { add_header "Access-Control-Allow-Origin" "$cors"; add_header "Access-Control-Allow-Methods" "GET, POST, OPTIONS, HEAD"; add_header "Access-Control-Allow-Headers" "Authorization, Origin, X-Requested-With, Content-Type, Accept"; return 200; } } } diff --git a/conf/supervisor-app.conf b/conf/supervisor-app.conf index 44c5bd95..ff989ff2 100644 --- a/conf/supervisor-app.conf +++ b/conf/supervisor-app.conf @@ -1,10 +1,13 @@ [program:app-uwsgi] command = /usr/local/bin/uwsgi --ini /oacct_checker/uwsgi.ini stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 stderr_logfile=/dev/stdout stderr_logfile_maxbytes=0 [program:nginx-app] command = /usr/sbin/nginx - +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stdout +stderr_logfile_maxbytes=0 diff --git a/conf/uwsgi_params b/conf/uwsgi_params index 52c6a4da..2c16a192 100644 --- a/conf/uwsgi_params +++ b/conf/uwsgi_params @@ -1,18 +1,18 @@ uwsgi_param QUERY_STRING $query_string; uwsgi_param REQUEST_METHOD $request_method; uwsgi_param CONTENT_TYPE $content_type; uwsgi_param CONTENT_LENGTH $content_length; uwsgi_param REQUEST_URI $request_uri; uwsgi_param PATH_INFO $document_uri; uwsgi_param DOCUMENT_ROOT $document_root; uwsgi_param SERVER_PROTOCOL $server_protocol; uwsgi_param HTTPS $https if_not_empty; -uwsgi_param X-Real-IP $remote_addr; +uwsgi_param X-Real-IP $http_x_forwarded_for; uwsgi_param REMOTE_ADDR $remote_addr; uwsgi_param REMOTE_PORT $remote_port; uwsgi_param SERVER_PORT $server_port; uwsgi_param SERVER_NAME $server_name; diff --git a/django_api/__init__.py b/django_api/__init__.py index e69de29b..dfa285ae 100644 --- a/django_api/__init__.py +++ b/django_api/__init__.py @@ -0,0 +1,17 @@ +""" +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +""" + diff --git a/django_api/admin.py b/django_api/admin.py index 96cc95cb..fb561423 100644 --- a/django_api/admin.py +++ b/django_api/admin.py @@ -1,506 +1,621 @@ +""" +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +""" + """ Django admin module for the django_api application All admin pages inherit from import_export.admin.ImportExportModelAdmin for JSON import/export. """ from django.contrib import admin from django import forms from datetime import date, datetime import re from import_export.admin import ImportExportModelAdmin from django.contrib.admin import TabularInline from django.contrib.admin import SimpleListFilter from django.contrib.admin import RelatedOnlyFieldListFilter +from django.db.models import Prefetch from django.forms.models import BaseInlineFormSet from django.utils.translation import gettext_lazy as _ #from inline_actions.admin import InlineActionsMixin #from inline_actions.admin import InlineActionsModelAdminMixin from django.shortcuts import render from django.http import HttpResponseRedirect from django.urls import reverse from django.utils.html import escape, mark_safe, format_html from .models import Country from .models import Language from .models import Issn from .models import Oa from .models import Publisher from .models import Journal from .models import Organization from .models import Version from .models import Licence from .models import Cost_factor_type from .models import Cost_factor from .models import Term from .models import ConditionType from .models import ConditionSubType from .models import ConditionSet from .models import OrganizationCondition from .models import JournalCondition # Register your models here. @admin.register(Issn) class IssnAdmin(ImportExportModelAdmin): # TODO use RelatedOnlyFieldListFilter for publisher when data allows it list_filter = ('issn_type', 'journal__publisher__name', ) list_display = ("id", "issn", 'journal') class IssnInline(admin.TabularInline): model = Issn readonly_fields = ('issn', 'issn_type',) # This Inline is stricty read-only for the moment def has_change_permission(self, request, obj=None): return False def has_add_permission(self, request, obj=None): return False def has_delete_permission(self, request, obj=None): return False @admin.register(Journal) class JournalAdmin(ImportExportModelAdmin): list_display = ("id", "name", "get_journal_issns",) # TODO use RelatedOnlyFieldListFilter for publisher when data allows it list_filter = ('oa_status', 'publisher', ) filter_horizontal = ('publisher', 'language', ) search_fields = ('name', 'classIssn__issn') inlines = (IssnInline, ) @admin.display(description='ISSNs') def get_journal_issns(self, obj): return list(Issn.objects.filter(journal=obj)) @admin.register(Language) class LanguageAdmin(ImportExportModelAdmin): pass @admin.register(Organization) class OrganizationAdmin(ImportExportModelAdmin): list_display = ("id", "name") list_filter = ('is_funder', ('country', RelatedOnlyFieldListFilter)) filter_horizontal = ('country', ) search_fields = ('name', ) @admin.register(Version) class VersionAdmin(ImportExportModelAdmin): pass @admin.register(Country) class CountryAdmin(ImportExportModelAdmin): pass @admin.register(Oa) class OaAdmin(ImportExportModelAdmin): pass @admin.register(Publisher) class PublisherAdmin(ImportExportModelAdmin): list_display = ("id", "name") # Experimental: what will happen with 200+ countries in the database? list_filter = (('country', RelatedOnlyFieldListFilter), ) filter_horizontal = ('country', ) @admin.register(Term) class TermAdmin(ImportExportModelAdmin): list_display = ("id", "__str__", ) list_filter = ('version', 'licence', 'ir_archiving') search_fields = ("id", "comment", "embargo_months") filter_horizontal = ('version', 'cost_factor', 'licence', ) # textarea input is better for comments def get_form(self, request, obj=None, **kwargs): kwargs['widgets'] = {'comment': forms.Textarea} return super().get_form(request, obj, **kwargs) @admin.register(ConditionType) class ConditionTypeAdmin(ImportExportModelAdmin): list_display = ("id", "condition_issuer") @admin.register(ConditionSubType) class ConditionTypeAdmin(ImportExportModelAdmin): list_display = ("id", "label") class JournalConditionFormset(forms.BaseInlineFormSet): def __init__(self, *args, **kwargs): super(JournalConditionFormset, self).__init__(*args, **kwargs) - self.queryset = self.queryset.select_related("journal", 'condition_set') + self.queryset = self.queryset.select_related('journal', 'condition_set', 'condition_set__condition_type') + #self.queryset = self.queryset.prefetch_related('journal') + #print('JournalConditionFormset.queryset length ', len(self.queryset)) + # print(self.queryset.prefetch_related('journal').__dict__) + + +class JournalConditionInlineForm(forms.ModelForm): + class Meta: + model = JournalCondition + exclude = () + + def __init__(self, *args, **kwargs): + super(JournalConditionInlineForm, self).__init__(*args, **kwargs) + #print('JournalConditionInlineForm created') + #self.fields['journal'].queryset = JournalCondition.objects.select_related('journal').all() + #class JournalConditionInline(InlineActionsMixin, TabularInline): class JournalConditionInline(TabularInline): model = JournalCondition fields = ('journal', 'valid_from', 'valid_until') + # ordering = ('journal__name', 'valid_from', 'valid_until') extra = 1 #inline_actions = ['connect_all_journals'] autocomplete_fields = ('journal', ) - formset = JournalConditionFormset + fk_name = 'condition_set' + formset = JournalConditionFormset + #form = JournalConditionInlineForm + + # IN PROGRESS 2022-04-20 we'll get back to it later + #form = JournalConditionInlineForm + + def get_queryset(self, *args, **kwargs): + qs = super().get_queryset(*args, **kwargs).select_related('journal') + print('JournalConditionInline.get_queryset() called') + print(qs.__dict__) + print(qs.all()) + return qs + + def dummy_get_formset(self, request, obj=None, **kwargs): + formset = super(JournalConditionInline, self).get_formset(request, obj, **kwargs) + queryset = formset.form.base_fields["journal"].queryset + formset.form.base_fields["journal"].queryset = queryset + return formset + + # not working AB 2022-04-27 + """ + def formfield_for_foreignkey(self, db_field, request, **kwargs): + if 'queryset' in kwargs: + kwargs['queryset'] = kwargs['queryset'].select_related() + else: + db = kwargs.pop('using', None) + kwargs['queryset'] = db_field.remote_field.to._default_manager.using(db).complex_filter(db_field.remote_field.limit_choices_to).select_related() + return super(JournalConditionInline, self).formfield_for_foreignkey(db_field, request, **kwargs) + """ """ def connect_all_journals(self, request, obj, parent_obj=None): # Do stuff here, then return None to go to current view return None connect_all_journals.short_description = ("Connect Condition Set with some or all Journals") """ - def get_queryset(self, request): - qs = super(JournalConditionInline, self).get_queryset(request).prefetch_related() - return qs.select_related('journal') + #def get_queryset(self, request): + # qs = super(JournalConditionInline, self).get_queryset(request).prefetch_related('journal', 'condition_set') + # return qs.select_related('journal', 'condition_set', 'condition_set__condition_type') + +class SimpleJournalConditionInline(TabularInline): + model = JournalCondition + fields = ('journal', 'valid_from', 'valid_until') + # ordering = ('journal__name', 'valid_from', 'valid_until') + extra = 1 + #autocomplete_fields = ('journal', ) + fk_name = 'condition_set' + + def get_queryset(self, *args, **kwargs): + qs = super().get_queryset(*args, **kwargs).select_related('journal') + #print('JournalConditionInline.get_queryset() called') + #print(qs.__dict__) + #print(qs.all()) + return qs + class OrganizationConditionFormset(forms.BaseInlineFormSet): def __init__(self, *args, **kwargs): super(OrganizationConditionFormset, self).__init__(*args, **kwargs) self.queryset = self.queryset.select_related("organization", 'condition_set') class OrganizationConditionInline(TabularInline): #class OrganizationConditionInline(InlineActionsMixin, TabularInline): # model = OrganizationCondition model = ConditionSet.organization.through extra = 1 autocomplete_fields = ('organization', ) - formset = OrganizationConditionFormset + formset = OrganizationConditionFormset + + def get_queryset(self, request): + qs = super(OrganizationConditionInline, self).get_queryset(request).prefetch_related('organization', 'condition_set') + return qs.select_related('organization', 'condition_set', 'condition_set__condition_type') @admin.action(description='Apply selected condition sets to multiple Journals') def connect_with_all_journals(modeladmin, request, queryset): """ Action applicable to one or more ConditionSets: connect with a list of Journals by entering the ISSN for the relevant ones, or all Journals if no ISSN is given. Start and end dates must be provided during the action. This action is useful to connect a new organization policy or publishing agreement with the journals to which it applies. """ if request.POST.get('apply'): try: valid_from = date.fromisoformat(request.POST['valid_from']) valid_until = date.fromisoformat(request.POST['valid_until']) issn_list = set([x for x in re.split(' |,|;|\n|\r|\t', request.POST['issn_list']) if len(x) > 0]) print(issn_list) if valid_from > valid_until: raise ValueError # print((valid_from, valid_until)) if len(issn_list) == 0: all_journals = Journal.objects.all() else: journal_ids = list(Issn.objects.filter(issn__in=issn_list).values_list('journal', flat=True).distinct()) # print(journal_ids) all_journals = Journal.objects.filter(id__in=journal_ids) print(all_journals) print(len(issn_list), len(all_journals)) # all_journals =[] # The following block could certainly be optimized! AB 2021-08-12 for condition_set in queryset: # print('-----------------') # print(condition_set) for j in all_journals: # print(j) # search for existing connections existing_connections = JournalCondition.objects.filter(journal=j, condition_set=condition_set, valid_from__lt=date.today(), valid_until__gt=date.today()) # print(existing_connections) if len(existing_connections) == 0: new_journal_condition = JournalCondition(journal=j, condition_set=condition_set, valid_from=valid_from, valid_until=valid_until) new_journal_condition.save() else: # This should not happen, or could it? print(f'{j} already connected with {condition_set}') return None except ValueError: pass return render(request, 'admin/get_validity_dates.html', context={'queryset': queryset, 'objects': 'journals'}) +@admin.action(description='Unlink selected condition sets from all Journals') +def disconnect_from_all_journals(modeladmin, request, queryset): + """ Action applicable to one or more ConditionSets: + disconnect from all journals + This action is useful in cases where it is easier to unlink the ConditionSet, + modify it and re-link later than to work through the ConditionSet admin page + (for example for org. policies applicable by default to all possible journals) + """ + warning = 'Are you sure you want to unlink all journals? Only the relationship is affected, ' + warning += 'no journal data will be destroyed - but hey, think about it first.' + if request.POST.get('apply'): + try: + for condition_set in queryset: + # print('-----------------') + # print(condition_set) + condition_set.journal.clear() + condition_set.save() + return None + except ValueError: + pass + return render(request, 'admin/are_you_sure.html', context={'queryset': queryset, 'text': warning, 'function': 'disconnect_from_all_journals'}) + + @admin.action(description='Apply selected condition sets to all Organizations') def connect_with_all_organizations(modeladmin, request, queryset): """ Action applicable to one or more ConditionSets: connect with all Organizations. Start and end dates must be provided during the action. This action is useful to connect a new journal policy with all known organizations. """ if request.POST.get('apply'): try: valid_from = date.fromisoformat(request.POST['valid_from']) valid_until = date.fromisoformat(request.POST['valid_until']) if valid_from > valid_until: raise ValueError # print((valid_from, valid_until)) all_orgs = Organization.objects.all() for condition_set in queryset: # print('-----------------') # print(condition_set) for o in all_orgs: # print(o) # search for existing connections existing_connections = OrganizationCondition.objects.filter(organization=o, condition_set=condition_set, valid_from__lt=date.today(), valid_until__gt=date.today()) # print(existing_connections) if len(existing_connections) == 0: new_organization_condition = OrganizationCondition(organization=o, condition_set=condition_set, valid_from=valid_from, valid_until=valid_until) new_organization_condition.save() return None except ValueError: pass return render(request, 'admin/get_validity_dates.html', context={'queryset': queryset, 'objects': 'organizations'}) @admin.action(description='Set valid_until date') def end_validity(modeladmin, request, queryset): """ Action to set the end date for selected Journal-Condition relationships. This action was introduced to add validity dates to batch-uploaded JournalConditions that lacked this information. """ if request.POST.get('apply'): try: valid_until = date.fromisoformat(request.POST['date']) queryset.update(valid_until=valid_until) return None except ValueError: pass return render(request, 'admin/get_single_validity_date.html', context={'queryset': queryset.prefetch_related(), 'limit': 'end', 'objects': 'selected journal-condition connections'}) @admin.action(description='Set valid_from date') def start_validity(modeladmin, request, queryset): """ Action to set the start date for selected Journal-Condition relationships. This action was introduced to add validity dates to batch-uploaded JournalConditions that lacked this information. """ if request.POST.get('apply'): try: valid_from = date.fromisoformat(request.POST['date']) queryset.update(valid_from=valid_from) return None except ValueError: pass return render(request, 'admin/get_single_validity_date.html', context={'queryset': queryset, 'limit': 'start', 'objects': 'selected journal-condition connections'}) class ConditionSetAdminForm(forms.ModelForm): class Meta: model = ConditionSet - fields = ['condition_type', 'subtype', 'term', 'source', 'comment', ] + fields = ['condition_type', 'subtype', 'term', 'source', 'comment', 'organization', 'journal'] def __init__(self, *args, **kwargs): #start = datetime.now() super(ConditionSetAdminForm, self).__init__(*args, **kwargs) + #print(self.__dict__) self.fields['term'].queryset = Term.objects.all().prefetch_related('licence', 'cost_factor', 'version') + #print('ConditionSetAdminForm.__init__(): ', datetime.now(), datetime.now()-start) @admin.register(ConditionSet) class ConditionSetAdmin(ImportExportModelAdmin): # class ConditionSetAdmin(InlineActionsModelAdminMixin, ImportExportModelAdmin): list_display = ("id", "condition_type", "comment") search_fields = ['organization__name', 'journal__name', 'comment', 'id', 'condition_type__condition_issuer'] list_filter = ('condition_type', 'journal__publisher__name', 'organization__name', ) form = ConditionSetAdminForm filter_horizontal = ('term', ) - exclude = ('organization', 'journal', ) + #exclude = ('organization', 'journal', ) inlines = (OrganizationConditionInline, JournalConditionInline, ) - # inlines = (OrganizationConditionInline, ) - actions = [connect_with_all_journals, connect_with_all_organizations] + #inlines = (OrganizationConditionInline, ) + actions = [connect_with_all_journals, connect_with_all_organizations, disconnect_from_all_journals] # textarea input is better for comments def get_form(self, request, obj=None, **kwargs): kwargs['widgets'] = {'comment': forms.Textarea} return super().get_form(request, obj, **kwargs) def get_queryset(self, request): #start = datetime.now() - test_model_qs = super(ConditionSetAdmin, self).get_queryset(request) - test_model_qs = test_model_qs.prefetch_related('organization', 'journal') + test_model_qs = super(ConditionSetAdmin, self).get_queryset(request).select_related('condition_type') + # test_model_qs = test_model_qs.prefetch_related('organizationcondition_set', 'journalcondition_set') + + # This could be the cause of 502 errors (out-of-memory killing) + #test_model_qs = test_model_qs.prefetch_related(Prefetch('journalcondition_set', + # queryset=JournalCondition.objects.select_related('condition_set', 'journal'))) + #test_model_qs = test_model_qs.prefetch_related('journal') + #print(test_model_qs.__dict__) + #print('ConditionSetAdmin.get_queryset(): ', datetime.now(), datetime.now()-start) return test_model_qs class XConditionValidListFilter(SimpleListFilter): """ Human-readable title which will be displayed in the right admin sidebar just above the filter options. """ title = _('currently valid') # Parameter for the filter that will be used in the URL query. parameter_name = 'valid' def lookups(self, request, model_admin): """ Returns a list of tuples. The first element in each tuple is the coded value for the option that will appear in the URL query. The second element is the human-readable name for the option that will appear in the right sidebar. """ return ( ('true', _('True')), ('false', _('False')), ) def queryset(self, request, queryset): """ Returns the filtered queryset based on the value provided in the query string and retrievable via `self.value()`. """ # Compare the requested value (either '80s' or '90s') # to decide how to filter the queryset. if self.value() == 'true': return queryset.filter(valid_from__lte=date.today(), valid_until__gte=date.today()) if self.value() == 'false': return queryset.exclude(valid_from__lte=date.today(), valid_until__gte=date.today()) class ConditionSetListDynamicFilter(SimpleListFilter): """ Dynamic filter-by-publisher for ConditionSets. Only Publishers of Journals connected to the currently displayed ConditionSets are proposed. """ title = _('condition sets (publisher-dependant)') parameter_name = 'condition_set' def lookups(self, request, model_admin): if 'journal__publisher__name' in request.GET: # A publisher name filter is in effect journal_publisher_name = request.GET['journal__publisher__name'] print([journal_publisher_name]) #cs_by_publisher = model_admin.model.objects.filter(journal__publisher__name=journal_publisher_name) #print(cs_by_publisher) #jcs_by_publisher = model_admin.model.objects.all().filter(journal__publisher__name=journal_publisher_name).prefetch_related() #condition_sets = set([c.condition_set for c in model_admin.model.objects.all().filter(journal__publisher__name=journal_publisher_name)]) #condition_sets = sorted(list(condition_sets)) cs = model_admin.model.objects.filter(journal__publisher__name=journal_publisher_name).values_list('condition_set') condition_sets = ConditionSet.objects.filter(id__in=cs).order_by('id') print(condition_sets) #condition_sets = ConditionSet.objects.filter(journal__publisher__name=journal_publisher_name).order_by('id') #condition_sets = model_admin.model.objects.filter(journal__publisher__name=journal_publisher_name). else: #condition_sets = set([c.condition_set for c in model_admin.model.objects.all()]) condition_sets = ConditionSet.objects.all().order_by('id') return [(s.id, str(s)) for s in condition_sets] def queryset(self, request, queryset): if self.value(): return queryset.filter(condition_set__id__exact=self.value()) @admin.register(OrganizationCondition) class OrganizationConditionAdmin(ImportExportModelAdmin): """ Organization-ConditionSet connection admin page """ @admin.display(description='Condition Set') def link_to_conditionset(self, obj): """ Calculated field for the list display: link to the relevant ConditionSet """ link = reverse("admin:django_api_conditionset_change", args=[obj.condition_set.id]) return format_html(f'{obj.condition_set}') search_fields = ("id", "organization__name", "condition_set__id") list_display = ("id", "organization_name", "link_to_conditionset", "valid_from", "valid_until") list_select_related = ('condition_set', ) list_filter = ('condition_set__condition_type', XConditionValidListFilter, # This has become very slow on 2021-12-09, will revisit later #('condition_set', RelatedOnlyFieldListFilter), 'condition_set__id') def organization_name(self, obj): return obj.organization.name def get_queryset(self, request): qs = super(OrganizationConditionAdmin, self).get_queryset(request).prefetch_related() return qs.select_related('condition_set', 'organization', 'condition_set__condition_type', ) @admin.register(Licence) class LicenceAdmin(ImportExportModelAdmin): pass @admin.register(JournalCondition) class JournalConditionAdmin(ImportExportModelAdmin): @admin.display(description='Condition Set') def link_to_conditionset(self, obj): link = reverse("admin:django_api_conditionset_change", args=[obj.condition_set.id]) return format_html(f'{obj.condition_set}') search_fields = ("id", "journal__name", "condition_set__id") list_display = ("id", "journal_name", "link_to_conditionset", "valid_from", "valid_until") list_filter = ('condition_set__condition_type', XConditionValidListFilter, 'journal__publisher__name', ConditionSetListDynamicFilter) #list_filter = ('condition_set__condition_type', XConditionValidListFilter, # 'journal__publisher__name',) actions = (end_validity, start_validity, ) def journal_name(self, obj): return obj.journal.name def get_queryset(self, request): qs = super(JournalConditionAdmin, self).get_queryset(request) return qs.select_related('condition_set', 'journal') # unsuccessful attempt # def formfield_for_foreignkey(self, db_field, request, **kwargs): # if db_field.name == "journal": # kwargs["queryset"] = Journal.objects.filter(publisher__name__in=Publisher.objects.order_by().values('name').distinct()) # return super().formfield_for_foreignkey(db_field, request, **kwargs) @admin.register(Cost_factor) class Cost_factorAdmin(ImportExportModelAdmin): list_display = ("id", "comment", "amount", "symbol") list_filter = ('cost_factor_type', 'symbol') # textarea input is better for comments def get_form(self, request, obj=None, **kwargs): kwargs['widgets'] = {'comment': forms.Textarea} return super().get_form(request, obj, **kwargs) @admin.register(Cost_factor_type) class Cost_factor_typeAdmin(ImportExportModelAdmin): list_display = ("id", "name") diff --git a/django_api/apps.py b/django_api/apps.py index 9f0069bc..cd2b2e35 100644 --- a/django_api/apps.py +++ b/django_api/apps.py @@ -1,6 +1,23 @@ +""" +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +""" + from django.apps import AppConfig class DjangoApiConfig(AppConfig): name = 'django_api' - verbose_name = 'OACCT back-end' + verbose_name = 'OACT back-end' diff --git a/django_api/models.py b/django_api/models.py index 4adfb638..4e653ccb 100644 --- a/django_api/models.py +++ b/django_api/models.py @@ -1,479 +1,496 @@ """ -Django object models for the django_api application of the OACCT project. +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +""" + +""" +Django object models for the django_api application of the OACT project. Ref: database_model_20210421_MB.drawio 21.04.2021 """ from django.db import models from django.contrib.auth.models import User import datetime from django.utils.translation import gettext as _ class Country(models.Model): """ Countries: used as attributes by Publishers and Organizations :param name: full English name :type name: str, optional :param iso_code: ISO 3166-1 Alpha-3 code https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3#Officially_assigned_code_elements :type iso_code: str, optional """ name = models.CharField(verbose_name="Country name", max_length=120, null=True) iso_code = models.CharField(max_length=3, null=True) def __str__(self): return f"{self.name}" class Meta: verbose_name_plural = 'Countries' ordering = ('name',) class Language(models.Model): """ Languages: used as attributes by Journals :param name: full English name :type name: str, optional :param iso_code: ISO 639-2 code https://en.wikipedia.org/wiki/ISO_639-2 :type iso_code: str, optional """ name = models.CharField(verbose_name="Language name", max_length=120, null=True) iso_code = models.CharField(max_length=3, null=True) def __str__(self): return f"{self.name}" class Meta: ordering = ('name',) class Oa(models.Model): """ Open Access status: used as attribute by Journals :param status: short name, ideally one word i.e. Green, Gold, UNKNOWN... :type status: str, optional :param description: description text up to 1000 characters :type status: str, optional :param subscription: does a journal with this status require a subscription? :type subscription: bool :param accepted_manuscript: does a journal with this status generally allow to distribute the accepted manuscript? :type accepted_manuscript: bool :param apc: does a journal with this status require Article Processing Charges (APCs)? :type apc: bool :param final_version: does a journal with this status generally allow to distribute the published version? :type final_version: bool """ status = models.CharField(max_length=1000, null=True) description = models.CharField(max_length=1000, null=True) subscription = models.BooleanField(default=False) accepted_manuscript = models.BooleanField(default=False) apc = models.BooleanField(default=False) final_version = models.BooleanField(default=False) def __str__(self): return f"{self.status}" class Meta: ordering = ('-subscription',) verbose_name = "Open Access status" verbose_name_plural = "Open Access statuses" class Publisher(models.Model): """ Publishers: corporations or societies in charge of Journals :param name: name :type status: str, optional :param city: location of the main office :type city: str, optional :param state: if applicable, state or province :type state: str, optional :param country: home country or countries :type country: many-to-many relationship with the `Country` class :param starting_year: founding year :type starting_year: int, optional :param website: main web site :type website: URL :param oa_policies: web link to general Open Access policy if applicable :type oa_policies: URL """ name = models.CharField(verbose_name="Publisher name", max_length=1000, null=True) city = models.CharField(max_length=100, null=True) state = models.CharField(max_length=3, null=True) country = models.ManyToManyField("Country") starting_year = models.IntegerField(blank=True, null=True) website = models.URLField(max_length=1000) oa_policies = models.URLField(max_length=1000) def __str__(self): return f"{self.name}" class Meta: ordering = ('name',) class Issn(models.Model): """ Issns: a multiple property of Journals :param journal: Journal object to which the ISSN belongs :type journal: class `Journal`, optional :param issn: ISSN code such as 1234-5678 :type issn: str :param issn_type: Print, Electronic or Other :type issn_type: str """ PRINT = '1' ELECTRONIC = '2' OTHER = '3' TYPE_CHOICES = ( (PRINT, 'Print'), (ELECTRONIC, 'Electronic'), (OTHER, 'Other'), ) journal = models.ForeignKey("Journal", null=True, on_delete=models.CASCADE, related_name = "classIssn") #journal.classissn issn = models.CharField(max_length=9, null=False) """ISSN code such as 1234-5678 """ issn_type = models.CharField( choices=TYPE_CHOICES, max_length=10, blank=True ) def __str__(self): return f"{self.issn} ({dict(self.TYPE_CHOICES)[self.issn_type]})" class Meta: ordering = ('issn',) class Journal(models.Model): """ Journals: one of the big entities in the application :param name: journal title :type name: str :param name_short_iso_4: ISO 4 abbreviation of the title :type name_short_iso_4: str :param publisher: zero or more publishers in charge of the Journal :type publisher: many-to-many relationshio with class `Publisher` :param website: home page of the journal :type website: URL :param language: the journal publishes articles in these zero or more languages :type language: many-to-many relationship with class `Journal` :param oa_options: web page with the journal's Open Access conditions :type oa_options: URL, optional :param oa_status: Open Access status :type oa_status: reference to an `Oa` object :param starting_year: founding year :type starting_year: int, optional :param end_year: end year if applicable :type ending_year: int, optional :param doaj_seal: did the journal obtain the DOAJ Seal? https://doaj.org/apply/seal/ :type doaj_seal: bool :param doaj_status: is the journal accepted in the Directory of Open Access Journals? https://doaj.org :type doaj_status: bool :param lockss: is the journal archived by LOCKSS? https://www.lockss.org/about :type lockss: bool :param nlch: please remind me what this is supposed to be :type nlch: bool :param portico: did the journal obtain the DOAJ Seal? https://doaj.org/apply/seal/ :type portico: is the journal archived by Portico? https://www.portico.org/ :param qoam_av_score: Quality Open Access Marker (QOAM) score https://www.qoam.eu/ :type qoam_av_score: decimal number """ name = models.CharField(verbose_name="Journal name", max_length=800, blank=True, null=True) # search journal with name name_short_iso_4 = models.CharField(max_length=300, blank=True, null=True) publisher = models.ManyToManyField(Publisher) website = models.URLField(max_length=300, blank=True, null=True) language = models.ManyToManyField(Language) # 2021-08-11: only one-to-many relationship between Journal and ISSN # issn = models.ForeignKey("Issn", null=True, on_delete=models.CASCADE) oa_options = models.URLField(max_length=1000, blank=True, null=True) oa_status = models.ForeignKey("Oa", related_name ="oa_status", on_delete=models.CASCADE, null=True) starting_year = models.IntegerField(blank=True, null=True) end_year = models.IntegerField(blank=True, null=True) doaj_seal = models.BooleanField(default=False) doaj_status = models.BooleanField(default=False) lockss = models.BooleanField(default=False) nlch = models.BooleanField(default=False) portico = models.BooleanField(default=False) qoam_av_score = models.DecimalField(decimal_places=2, max_digits=5, blank=True, null=True) def __str__(self): return f"{self.name} from {self.website}" class Meta: ordering = ('name',) class Organization(models.Model): """ Organizations: one of the big entities in the application, organizations (research institutions or funders) who employ or fund the authors/researchers :param name: name of the organization :type name: str :param website: web site of the organization :type website: URL, optional :param country: zero or more home countries :type country: many-to-many relationship with class `Country` :param ror: Research Organization Registry (ROR) indentifier https://ror.org/ :type ror: str, optional :param fundref: Crossref Funder Registry identifier https://www.crossref.org/services/funder-registry/ :type fundref: str, optional :param starting_year: founding year :type starting_year: int, optional :param is_funder: if True, the organization is a funding agency, if False a research organization :type is_funder: bool :param ir_name: name of the oeganization's institutional repository for publications, if applicable :type ir_name: str, optional :param ir_url: address of the oeganization's institutional repository for publications, if applicable :type ir_name: URL, optional """ name = models.CharField(verbose_name="Organization name", max_length=600, null=True) website = models.URLField(max_length=600, blank=True, null=True) country = models.ManyToManyField("Country") ror = models.CharField(max_length=255, blank=True, null=True) fundref = models.CharField(max_length=255, blank=True, null=True) starting_year = models.IntegerField(blank=True, null=True) is_funder = models.BooleanField(default=False) ir_name = models.CharField(verbose_name="Institutional repository name", max_length=40, null=True, blank=True) ir_url = models.URLField(verbose_name="Institutional repository URL", max_length=100, null=True, blank=True) def __str__(self): return f"{self.name}" class Meta: ordering = ('name',) class Version(models.Model): """ Possible versions of an article during its life cycle: submitted version, accepted version, published version :param name: name of the version :type name: str """ description = models.CharField(max_length=300, null=False) def __str__(self): return f"{self.description}" class Licence(models.Model): """ Licenses that can or must be applied to an article version :param name_or_abbrev: name or abbreviation for the license: copyright, CC-BY,... :type name_or_abbrev: str :param website: web page that describes the license terms :type website: URL, optional """ name_or_abbrev = models.CharField(max_length=300, null=False) website = models.URLField(max_length=600, null=True, blank=True) class Meta: ordering = ('name_or_abbrev',) def __str__(self): return f"{self.name_or_abbrev}" class Cost_factor_type(models.Model): """ Cost factor types: amount, discount... :param name: name of the type :type name: str """ name = models.CharField(max_length=300, null=False) def __str__(self): return f"{self.name}" class Cost_factor(models.Model): """ Cost factors: financial terms applicable to use an Open Access option :param cost_factor_type: type of the cost factor :type ost_factor_type: reference to a `Cost_factor` object :param amount: actual cost or discount :type amount: int :param symbol: currency code or % :type symbol: str :param comment: extra information in free text :type comment: str, optionaé """ cost_factor_type = models.ForeignKey(Cost_factor_type, on_delete=models.CASCADE, blank=True, null=True) amount = models.IntegerField(null=False) symbol = models.CharField(max_length=10, null=False) comment = models.CharField(max_length=120, default="") class Meta: ordering = ('amount',) def __str__(self): return f"{self.id} - {self.amount} {self.symbol} - {self.comment}" class Term(models.Model): """ Terms: possible options to disseminate an article in Open Access :param version: zero or more versions for which the Term is applicable (currently only 1 is supported by the application) :type version: many-to-many relationship to the `Version` class :param cost_factor: zero or more possible cost factors :type cost_factor: many-to-many relationship to the `Cost_factor` class :param licence: zero or more possible licenses :type licence: many-to-many relationship to the `Licence` class :param embargo_months: duration of a possible embargo in months :type embargo_months: int :param ir_archiving: is archiving in an institutional repository allowed/required or not? :type ir_archiving: bool :param comment: extra information as free text :type comment: str, optional """ version = models.ManyToManyField(Version) cost_factor = models.ManyToManyField(Cost_factor) licence = models.ManyToManyField(Licence) embargo_months = models.IntegerField(blank=True, null=True) ir_archiving = models.BooleanField(default=False) comment = models.CharField(max_length=1000, null=True, blank=True) def __str__(self): try: # Maybe these fields should not allow NULL values? if self.embargo_months is None: embargo = 'no_' else: embargo = str(self.embargo_months) if self.comment is None: comment = '' else: comment = str(self.comment) term_data = (str(self.id), ';'.join([str(x) for x in self.version.all()]), ';'.join([str(x) for x in self.licence.all()]), ';'.join([str(x) for x in self.cost_factor.all()]), f'Archiving{str(self.ir_archiving)} {embargo}months', comment,) return ' - '.join(term_data) except RecursionError: # The JSON import in the admin module somehow throws a ValueError during the loading process # probably due to incomplete information in the many2many relationships # Then the error log apparently triggers a cascade of errors until # the RecursionError level is hit. Falling back to a basic __str__ # for the RecursionError seems to bypass the problem. return f"[Term.__str__() error] {self.id} - {self.comment}" class Meta: ordering = ('-ir_archiving', 'embargo_months', 'comment') class ConditionType(models.Model): """ Condition types: issued by a journal, by an organization, or agreement between both? :param condition_issuer: `organization-only`, `agreement` or `journal-only` :type condition_issuer: str """ condition_issuer = models.CharField(max_length=300, null=False) def __str__(self): return f"{self.condition_issuer}" class ConditionSubType(models.Model): """ Condition subtypes: in case we need to distinguish more finely than the 3 main condition types :param label: name of the subtype :type label_issuer: str """ label = models.CharField(max_length=300, null=False) def __str__(self): return f"{self.label}" @classmethod def get_default_pk(cls): """ An automatic subtype is attributed to any newly created `CondtionSet` object """ condition_subtype, created = cls.objects.get_or_create(label='Automatic') return condition_subtype.pk class ConditionSet(models.Model): """ Condition sets: collections of Open Access terms applicable to zero or more Journals and zero or more Organizations for some specific reason (policy document, agreement, contract...). :param condition_type: type for the condition set :type condition_type: reference to a `ConditionType` object :param subtype: subtype for the condition set :type subtype: eference to a `ConditionSubType` object :param organization: zero or more organisations to which the condition set is applicable :type organization: many-to-many relationship with the `Organization` class with `OrganizationCondition` objects as connectors :param journal: zero or more journals to which the condition set is applicable :type journal: many-to-many relationship with the `Journal` class with `JournalCondition` objects as connectors :param term: zero or more terms included in the condition set :type term: many-to-many relationship with the `Term` class :param source: web page with information about the condition set (origin, perimeter, etc.) :type source: URL, optional :param comment: description of the condition set as free text (will be used as a title in the frontend) :type comment: str, optional """ condition_type = models.ForeignKey(ConditionType, on_delete=models.CASCADE, blank=True, null=True) subtype = models.ForeignKey(ConditionSubType, on_delete=models.CASCADE, default=ConditionSubType.get_default_pk, null=True) organization = models.ManyToManyField( Organization, through='OrganizationCondition', through_fields=('condition_set', 'organization') ) journal = models.ManyToManyField( Journal, through='JournalCondition', through_fields=('condition_set', 'journal') ) term = models.ManyToManyField(Term) source = models.URLField(max_length=600, null=True, blank=True) comment = models.CharField(max_length=100, null=True, blank=True) def __str__(self): return f"{self.id} {self.condition_type}|{self.comment}" class Meta: # TODO does this work??? 2ndary sort showing institution first, funder second # ordering = ('condition_type__pk', 'organization__is_funder', 'subtype__id', 'comment') # No it does not, it duplicates most journal policies (one copy for funders, one for institutions) ordering = ('condition_type__pk', 'subtype__id', 'comment') class OrganizationCondition(models.Model): """ Organization-ConditionSet connector, linking `organization` with `condition`. The first (`valid_from`) and last (`valid_until`) known days of validity are recorded. """ organization = models.ForeignKey(Organization, on_delete=models.CASCADE, blank=True, null=True) condition_set = models.ForeignKey(ConditionSet, on_delete=models.CASCADE, blank=True, null=True) valid_from = models.DateField(blank=True, null=True) valid_until = models.DateField(blank=True, null=True) class Meta: verbose_name = "Organization/condition_set relationship" def __str__(self): return f"{self.id} {self.organization.name}/ConditionSet {self.condition_set.id}" class JournalCondition(models.Model): """ Journal-ConditionSet connector, linking `journal` with `condition`. The first (`valid_from`) and last (`valid_until`) known days of validity are recorded. """ journal = models.ForeignKey(Journal, on_delete=models.CASCADE, blank=True, null=True) condition_set = models.ForeignKey(ConditionSet, on_delete=models.CASCADE, blank=True, null=True) valid_from = models.DateField(blank=True, null=True) valid_until = models.DateField(blank=True, null=True) class Meta: verbose_name = "Journal/condition_set relationship" def __str__(self): return f"{self.id} {self.journal.name}/{self.condition_set}" diff --git a/django_api/serializers.py b/django_api/serializers.py index 880944f6..10605fd5 100644 --- a/django_api/serializers.py +++ b/django_api/serializers.py @@ -1,311 +1,328 @@ +""" +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +""" + """ REST API serializers All serializers inherit from WritableNestedModelSerializer to allow writing nested objects through the API as per https://github.com/beda-software/drf-writable-nested and RQLMixin to support the Resource Query Language (RQL) https://django-rql.readthedocs.io/en/latest/ """ from rest_framework import serializers from dj_rql.drf.serializers import RQLMixin from .models import * from drf_writable_nested.serializers import WritableNestedModelSerializer class CountrySerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for Countries """ id = serializers.IntegerField(required=False) name = serializers.CharField(required=False) iso_code = serializers.CharField(required=False) class Meta: model = Country fields = '__all__' depth = 4 class LanguageSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for Languages """ id = serializers.IntegerField(required=False) name = serializers.CharField(required=False) iso_code = serializers.CharField(required=False) class Meta: model = Language fields = '__all__' depth = 4 class PublisherSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for Publishers """ id = serializers.IntegerField(required=False) country = CountrySerializer(required=False, many=True) class Meta: model = Publisher fields = '__all__' depth = 4 class OaSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for OA statuses """ id = serializers.IntegerField(required=False, allow_null=True) description = serializers.CharField(required=False, allow_null=True) subscription = serializers.BooleanField(required=False) accepted_manuscript = serializers.BooleanField(required=False) apc = serializers.BooleanField(required=False) final_version = serializers.BooleanField(required=False) class Meta: model = Oa fields = '__all__' depth = 4 class IssnSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for ISSNs """ id = serializers.IntegerField(required=False) class Meta: model = Issn fields = '__all__' depth = 1 class JournalSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for Journals """ id = serializers.IntegerField(required=False) issn = IssnSerializer(required=False, source='classIssn', many=True) publisher = PublisherSerializer(required=False, many=True) language = LanguageSerializer(required=False, many=True) # allow update via post request --> "oa_status": {2}, # oa_status = serializers.PrimaryKeyRelatedField(queryset=Oa.objects.all()) oa_status = OaSerializer(required=False,allow_null=True) class Meta: model = Journal fields = '__all__' depth = 4 class LicenceSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for Licences """ id = serializers.IntegerField(required=False) name_or_abbrev = serializers.CharField() website = serializers.URLField(allow_null=True, required=False) class Meta: model = Licence fields = '__all__' depth = 4 class Cost_factor_typeSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for cost factor types """ id = serializers.IntegerField(required=False) name = serializers.CharField() class Meta: model = Cost_factor_type fields = '__all__' depth = 4 class VersionSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for article versions """ id = serializers.IntegerField(required=False) description = models.CharField() class Meta: model = Version fields = '__all__' depth = 4 class OrgaSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for organizations """ id = serializers.IntegerField(required=False) country = CountrySerializer(required=False, many=True) class Meta: model = Organization fields = '__all__' depth = 4 class Cost_factorSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for cost factors """ id = serializers.IntegerField(required=False) cost_factor_type = Cost_factor_typeSerializer(required=False, allow_null=True) amount = serializers.IntegerField() symbol = serializers.CharField() comment = serializers.CharField(required=False) class Meta: model = Cost_factor fields = '__all__' depth = 4 class TermSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for terms """ id = serializers.IntegerField(required=False) version = VersionSerializer(required=False, many=True) cost_factor = Cost_factorSerializer(required=False, many=True) licence = LicenceSerializer(required=False, many=True) class Meta: model = Term fields = '__all__' depth = 4 class ConditionTypeSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for condition types """ id = serializers.IntegerField(required=False) condition_issuer = serializers.CharField() class Meta: model = ConditionType fields = '__all__' depth = 4 class ConditionSubTypeSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for condition subtypes """ id = serializers.IntegerField(required=False) label = serializers.CharField() class Meta: model = ConditionSubType fields = '__all__' depth = 4 class ConditionSetSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for condition sets """ id = serializers.IntegerField(required=False) term = TermSerializer(many=True, read_only=False) condition_type = ConditionTypeSerializer(read_only=False) subtype = ConditionSubTypeSerializer(read_only=False) organization = OrgaSerializer(many=True, read_only=False) journal = JournalSerializer(many=True, read_only=False) comment = serializers.CharField(read_only=False) source = serializers.URLField(read_only=False) class Meta: model = ConditionSet # pre filter for rql # fields = ['id','condition_type','term','journal','organization'] # add for informations purpose fields = '__all__' depth = 4 class JournalIdSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API light-weight serializer for journals, using only the ID. Used by the frontend when building the query """ id = serializers.IntegerField(required=False) # allow update via post request --> "oa_status": {2}, class Meta: model = Journal fields = ['id'] class ConditionSetLightSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API serializer for condition sets, providing only the information needed in the frontend to improve performance """ id = serializers.IntegerField(required=False) term = TermSerializer(many=True, read_only=False) condition_type = ConditionTypeSerializer(read_only=False) subtype = ConditionSubTypeSerializer(read_only=False) organization = OrgaSerializer(many=True, read_only=False) # No journals in this one. journal = JournalIdSerializer(many=True, read_only=False) comment = serializers.CharField(read_only=False) source = serializers.URLField(read_only=False) class Meta: model = ConditionSet # pre filter for rql # fields = ['id','condition_type','term','journal','organization'] # add for informations purpose fields = ['id', 'condition_type', 'subtype', 'term', 'organization', 'journal', 'comment', 'source'] depth = 4 class JournalLightSerializer(WritableNestedModelSerializer, RQLMixin): """ REST API lighter serializer for journals """ id = serializers.IntegerField(required=False) # allow update via post request --> "oa_status": {2}, oa_status = serializers.PrimaryKeyRelatedField(queryset=Oa.objects.all()) language = serializers.PrimaryKeyRelatedField(queryset=Language.objects.all(), many=True) publisher = serializers.PrimaryKeyRelatedField(queryset=Publisher.objects.all(), many=True) starting_year = serializers.IntegerField(required=False) end_year = serializers.IntegerField(required=False) class Meta: model = Journal fields = ['id', 'name', 'oa_status', 'language', 'publisher', 'starting_year', 'end_year'] depth = 1 class OaSerializer(WritableNestedModelSerializer,RQLMixin): """ REST API serializers for OA statuses """ id = serializers.IntegerField(required=False) status = serializers.CharField(allow_null=True) description = serializers.CharField(allow_null=True) subscription = serializers.BooleanField(required=False) accepted_manuscript = serializers.BooleanField(required=False) apc = serializers.BooleanField(required=False) final_version = serializers.BooleanField(required=False) class Meta: model = Oa fields = '__all__' depth = 4 class OrganizationConditionSerializer(serializers.ModelSerializer, RQLMixin): """ REST API serializers for Organisation-condition connections """ id = serializers.IntegerField(required=False) organization = OrgaSerializer(required=False) condition_set = ConditionSetSerializer(required=False) class Meta: model = OrganizationCondition fields = '__all__' depth = 4 class JournalConditionSerializer(serializers.ModelSerializer, RQLMixin): """ REST API serializers for Organisation-condition connections """ id = serializers.IntegerField(required=False) journal = JournalSerializer(required=False) condition_set = ConditionSetSerializer(required=False) class Meta: model = JournalCondition fields = '__all__' depth = 4 diff --git a/django_api/tests.py b/django_api/tests.py index 7ce503c2..f8f7dc4c 100644 --- a/django_api/tests.py +++ b/django_api/tests.py @@ -1,3 +1,20 @@ +""" +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +""" + from django.test import TestCase # Create your tests here. diff --git a/django_api/urls.py b/django_api/urls.py index 2c99628e..996883b6 100644 --- a/django_api/urls.py +++ b/django_api/urls.py @@ -1,39 +1,56 @@ +""" +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +""" + from django.urls import path, re_path, include from django.conf.urls.static import static from django.conf import settings from .views import * from rest_framework import routers from rest_framework.schemas import get_schema_view router = routers.DefaultRouter() router.register(r'journal', JournalViewSet) router.register(r'journal_light', JournalLightViewSet) router.register(r'organization', OrgaViewSet) router.register(r'funder', FunderViewSet) router.register(r'conditionset', ConditionSetViewSet) router.register(r'conditionset_light', ConditionSetLightViewSet) router.register(r'term', TermViewSet) # show table details in the API router.register(r'country', CountryViewSet) router.register(r'language', LanguageViewSet) router.register(r'issn', IssnViewSet) router.register(r'oa', OaViewSet) router.register(r'publisher', PublisherViewSet) router.register(r'version', VersionViewSet) router.register(r'licence', LicenceViewSet) router.register(r'cost_factor_type', Cost_factor_typeViewSet) router.register(r'cost_factor', Cost_factorViewSet) router.register(r'conditiontype', ConditionTypeViewSet) router.register(r'JournalCondition', JournalConditionViewSet) router.register(r'organizationCondition', OrganizationConditionViewSet) urlpatterns = [ path('', include(router.urls)), path('openapi', get_schema_view( - title="OACCT API", - description="API of the Open Access Compliance Check Tool (OACCT)", - version ="0.9" + title="OACT API", + description="API of the Open Access Check Tool (OACT)", + version ="1.0" ), name='openapi-schema'), ] diff --git a/django_api/views.py b/django_api/views.py index d43d690f..fe1c38b3 100644 --- a/django_api/views.py +++ b/django_api/views.py @@ -1,304 +1,321 @@ +""" +This is the Open Access Check Tool (OACT). +The publication of scientific articles as Open Access (OA), usually in the variants "Green OA" and "Gold OA", allows free access to scientific research results and their largely unhindered dissemination. Often, however, the multitude of available publication conditions makes the decision in favor of a particular journal difficult: requirements of the funding agencies and publication guidelines of the universities and colleges must be carefully compared with the offers of the publishing houses, and separately concluded publication agreements can also offer additional benefits. The "OA Compliance Check Tool" provides a comprehensive overview of the possible publication conditions for a large number of journals, especially for the Swiss university landscape, and thus supports the decision-making process. + +© All rights reserved. ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland, Scientific Information and Libraries, 2022 + +See LICENSE.TXT for more details. + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. + +You should have received a copy of the GNU Affero General Public License along with this program. If not, see +. + +""" + from django.contrib.auth.models import AbstractUser from django.shortcuts import render from django.contrib.auth import authenticate, login, logout from django.shortcuts import render from django.http import HttpResponse, HttpResponseRedirect, Http404, JsonResponse from .models import * from .serializers import * from rest_framework import viewsets, filters, generics from django.utils.decorators import method_decorator from django.views.decorators.cache import cache_page from rest_framework.authentication import BasicAuthentication from rest_framework.permissions import IsAuthenticatedOrReadOnly from rest_framework import status from rest_framework.decorators import api_view from rest_framework.response import Response from rest_framework_tracking.mixins import LoggingMixin from itertools import chain from django.db.models import Count, Max from dj_rql.filter_cls import RQLFilterClass from urllib.parse import unquote from datetime import date import ipaddress class JournalViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] search_fields = ['name'] filter_backends = (filters.SearchFilter,) queryset = Journal.objects.all() serializer_class = JournalSerializer class JournalLightViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] search_fields = ['name'] filter_backends = (filters.SearchFilter,) queryset = Journal.objects.all().prefetch_related('publisher', 'language', 'oa_status') serializer_class = JournalLightSerializer @method_decorator(cache_page(4 * 60 * 60)) def dispatch(self, request, *args, **kwargs): # print('dispatch() called') return super().dispatch(request, *args, **kwargs) @method_decorator(cache_page(4 * 60 * 60)) def list(self, request): # print('list() called') serializer = self.serializer_class(self.queryset, many=True) return Response(serializer.data) class OrgaViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = OrgaSerializer queryset = Organization.objects.filter( is_funder=False ) class ConditionSetFilters(RQLFilterClass): """ API filters for the essential query on ConditionSets Arguments can include a journal id, zero to two organization ids, validity dates and a condition type. Request examples: http://127.0.0.1:8000/api/conditionset/?and(eq(journalcondition.journal.id,3),eq(organizationcondition.organization.id,11),eq(condition_type.id,1)) http://127.0.0.1:8000/api/conditionset/?and(eq(journalcondition.journal.id,14),ne(condition_type.id,2),ge(journalcondition.valid_until,2021-08-20),le(journalcondition.valid_from,2021-08-20),ge(organizationcondition.valid_until,2021-08-20),le(organizationcondition.valid_from,2021-08-20)) """ MODEL = ConditionSet DISTINCT = True FILTERS = ( 'id', { 'namespace': 'journalcondition', 'filters': ['id', 'valid_from', 'valid_until', { 'namespace': 'journal', 'filters': ['id', ], } ], }, { 'namespace': 'organizationcondition', 'filters': ['id', 'valid_from', 'valid_until', { 'namespace': 'organization', 'filters': ['id', ] } ], }, { 'namespace': 'condition_type', 'filters': ['id', ], }, ) class MyLoggingMixin(LoggingMixin): """ Supercharge drf_tracking.LoggingMixin to get the real IP address in the OpenShift infrastructure """ def _get_ip_address(self, request): """Get the remote ip address the request was generated from.""" - print(request.META) - ipaddr = request.META.get("HTTP_X_FORWARDED_FOR", None) + # print(request.META) + ipaddr = request.META.get("X-Real-IP", None) if ipaddr: ipaddr = ipaddr.split(",")[0] else: - ipaddr = request.META.get("HTTP_X_REAL_IP", None) + ipaddr = request.META.get("HTTP_X_FORWARDED_FOR", None) if ipaddr: ipaddr = ipaddr.split(",")[0] else: ipaddr = request.META.get("REMOTE_ADDR", "").split(",")[0] # Account for IPv4 and IPv6 addresses, each possibly with port appended. Possibilities are: # # # :port # []:port # Note that ipv6 addresses are colon separated hex numbers possibles = (ipaddr.lstrip("[").split("]")[0], ipaddr.split(":")[0]) for addr in possibles: try: return str(ipaddress.ip_address(addr)) except ValueError: pass return ipaddr class ConditionSetViewSet(MyLoggingMixin, viewsets.ModelViewSet): """ ViewSet for ConditionSets The QuerySet obtained from the database is annotated to obtain the desired sorting order, i.e. by condition_type, then subtype, then a calculated score so that institutions receive more attention than funders within a given type/subtype """ authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] queryset = ConditionSet.objects.all().annotate(include_funder=Max('organization__is_funder')).order_by('condition_type','subtype', 'include_funder','comment') # queryset = ConditionSet.objects.values('term__version__description') serializer_class = ConditionSetSerializer # serializer_class = ConditionGroupedSerializer rql_filter_class = ConditionSetFilters #.objects.values('term__version.description') class ConditionSetLightViewSet(MyLoggingMixin, viewsets.ModelViewSet): """ Light-weight ViewSet for ConditionSets The QuerySet obtained from the database is annotated to obtain the desired sorting order, i.e. by condition_type, then subtype, then a calculated score so that institutions receive more attention than funders within a given type/subtype """ authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] queryset = ConditionSet.objects.all().annotate(include_funder=Max('organization__is_funder')).order_by('condition_type','subtype','include_funder','comment') serializer_class = ConditionSetLightSerializer rql_filter_class = ConditionSetFilters class FunderViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = OrgaSerializer queryset = Organization.objects.filter( is_funder=True ) class TermViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = TermSerializer queryset = Term.objects.all() class CountryViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = CountrySerializer queryset = Country.objects.all() class LanguageViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = LanguageSerializer queryset = Language.objects.all() class IssnViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = IssnSerializer queryset = Issn.objects.all() class OaViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = OaSerializer queryset = Oa.objects.all() class PublisherViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = PublisherSerializer queryset = Publisher.objects.all() class VersionViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = VersionSerializer queryset = Version.objects.all() class LicenceViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = LicenceSerializer queryset = Licence.objects.all() class Cost_factor_typeViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = Cost_factor_typeSerializer queryset = Cost_factor_type.objects.all() class Cost_factorViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = Cost_factorSerializer queryset = Cost_factor.objects.all() class ConditionTypeViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = ConditionTypeSerializer queryset = ConditionType.objects.all() class OrganizationConditionViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = OrganizationConditionSerializer queryset = OrganizationCondition.objects.all() class JournalConditionViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = JournalConditionSerializer queryset = JournalCondition.objects.all() class OrganizationConditionViewSet(viewsets.ModelViewSet): authentification_classes = (BasicAuthentication,) permission_classes = [IsAuthenticatedOrReadOnly] serializer_class = OrganizationConditionSerializer queryset = OrganizationCondition.objects.all() # Count number of different version # OrganizationCondition.objects.annotate(version_count=Count('condition_set__term__version')) # OrganizationCondition.objects # .values('condition_set__term__version') #what to group by # .annotate(version_count=Count('condition_set__term__version')) # what to aggregate # group by version and count # OrganizationCondition.objects.values('condition_set__term__version').annotate(version_count=Count('condition_set__term__version')) # source https://hakibenita.com/django-group-by-sql # https://docs.djangoproject.com/en/3.2/topics/db/aggregation/ # OrganizationCondition.objects.values('condition_set__term__version').filter(organization_id=1).annotate(version_count=Count('condition_set__term__version')) \ No newline at end of file diff --git a/django_app/settings.py b/django_app/settings.py index 86c246da..a24ffdc0 100644 --- a/django_app/settings.py +++ b/django_app/settings.py @@ -1,176 +1,182 @@ """ Django settings for django_api project. Generated by 'django-admin startproject' using Django 3.1.3. For more information on this file, see https://docs.djangoproject.com/en/3.1/topics/settings/ For the full list of settings and their values, see https://docs.djangoproject.com/en/3.1/ref/settings/ """ import os # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/3.1/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! SECRET_KEY = 'SECRET_KEY' # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True +# In case something goes wrong while trying to run with DEBUG = False +DEBUG_PROPAGATE_EXCEPTIONS = True + ALLOWED_HOSTS = ['0.0.0.0', '127.0.0.1', ] INTERNAL_IPS = [ # ... '127.0.0.1', # ... ] # Application definition INSTALLED_APPS = [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'import_export', 'django_api', 'corsheaders', 'rest_framework', 'rql_filter', 'django_extensions', - 'debug_toolbar', + #'debug_toolbar', 'rest_framework_tracking', # Not necessary at this point but let's keep it as a possible idea # 'inline_actions', ] MIDDLEWARE = [ #'debug_toolbar.middleware.DebugToolbarMiddleware', 'django.middleware.security.SecurityMiddleware', 'whitenoise.middleware.WhiteNoiseMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'corsheaders.middleware.CorsMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', ] ROOT_URLCONF = 'django_app.urls' CORS_ORIGIN_ALLOW_ALL = False TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', 'DIRS': [ os.path.join(BASE_DIR, 'templates'), os.path.join(BASE_DIR, 'staticfiles') ], 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ 'django.template.context_processors.debug', 'django.template.context_processors.request', 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', ], }, }, ] TEMPLATE_LOADERS = ( ('django.template.loaders.cached.Loader', ( 'django.template.loaders.filesystem.Loader', 'django.template.loaders.app_directories.Loader', )), ) WSGI_APPLICATION = 'django_app.wsgi.application' # Database # https://docs.djangoproject.com/en/3.1/ref/settings/#databases DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), } } # Password validation # https://docs.djangoproject.com/en/3.1/ref/settings/#auth-password-validators AUTH_PASSWORD_VALIDATORS = [ { 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', }, { 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', }, { 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', }, { 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', }, ] # Internationalization # https://docs.djangoproject.com/en/3.1/topics/i18n/ LANGUAGE_CODE = 'en-us' TIME_ZONE = 'Europe/Zurich' USE_I18N = True USE_L10N = True USE_TZ = True # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/3.1/howto/static-files/ STATIC_URL = '/static/' STATICFILES_DIRS = [ os.path.join(BASE_DIR, 'static'), ] # static file for production use only STATIC_ROOT = os.path.join(BASE_DIR, 'staticfiles') STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage' REST_FRAMEWORK = { 'DEFAULT_FILTER_BACKENDS': ['dj_rql.drf.RQLFilterBackend'] } # will this be enough to set 2 validity dates for all journals? DATA_UPLOAD_MAX_NUMBER_FIELDS = 40000 # Provide Django 3.2 with a safe default DEFAULT_AUTO_FIELD = 'django.db.models.AutoField' # drf-tracking should not expose editable log entries -DRF_TRACKING_ADMIN_LOG_READONLY=True +DRF_TRACKING_ADMIN_LOG_READONLY = True + +SHELL_PLUS_PRINT_SQL_TRUNCATE = None +RUNSERVER_PLUS_PRINT_SQL_TRUNCATE = None diff --git a/import_scripts/01_oacct_countries.md b/import_scripts/01_oacct_countries.md deleted file mode 100644 index 1aa14a4e..00000000 --- a/import_scripts/01_oacct_countries.md +++ /dev/null @@ -1,587 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - - -```python -import pandas as pd -import csv -import json -import numpy as np -``` - -## Table Countries - - -```python -# La table a été corrigée pour ajouter la valeur manquante à la fin : -# International Agency International Agency OI INT 999 -country = pd.read_csv('iso_3166.txt', encoding='utf-8', header=0, sep='\t', na_filter=False) -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
English short nameFrench short nameAlpha-2 codeAlpha-3 codeNumeric
0AfghanistanAfghanistan (l')AFAFG4
1AlbaniaAlbanie (l')ALALB8
2AlgeriaAlgérie (l')DZDZA12
3American SamoaSamoa américaines (les)ASASM16
4AndorraAndorre (l')ADAND20
..................
245YemenYémen (le)YEYEM887
246ZambiaZambie (la)ZMZMB894
247ZimbabweZimbabwe (le)ZWZWE716
248Åland IslandsÅland(les Îles)AXALA248
249International AgencyInternational AgencyOIINT999
-

250 rows × 5 columns

-
- - - - -```python -country.loc[country['Alpha-2 code'].isnull()] -``` - - - - -
- - - - - - - - - - - - - - -
English short nameFrench short nameAlpha-2 codeAlpha-3 codeNumeric
-
- - - - -```python -# convertir l'index en id -country = country.reset_index() -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexEnglish short nameFrench short nameAlpha-2 codeAlpha-3 codeNumeric
00AfghanistanAfghanistan (l')AFAFG4
11AlbaniaAlbanie (l')ALALB8
22AlgeriaAlgérie (l')DZDZA12
33American SamoaSamoa américaines (les)ASASM16
44AndorraAndorre (l')ADAND20
.....................
245245YemenYémen (le)YEYEM887
246246ZambiaZambie (la)ZMZMB894
247247ZimbabweZimbabwe (le)ZWZWE716
248248Åland IslandsÅland(les Îles)AXALA248
249249International AgencyInternational AgencyOIINT999
-

250 rows × 6 columns

-
- - - - -```python -country['id'] = country['index'] + 1 -del country['index'] -del country['French short name'] -del country['Alpha-3 code'] -del country['Numeric'] -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
English short nameAlpha-2 codeid
0AfghanistanAF1
1AlbaniaAL2
2AlgeriaDZ3
3American SamoaAS4
4AndorraAD5
............
245YemenYE246
246ZambiaZM247
247ZimbabweZW248
248Åland IslandsAX249
249International AgencyOI250
-

250 rows × 3 columns

-
- - - - -```python -# renommer les colonnes -country = country.rename(columns={'Alpha-2 code' : 'iso_code', 'English short name' : 'name'}) -``` - - -```python -# ajout de la valeur UNKNOWN -country = country.append({'id' : 999999, 'iso_code' : '__', 'name' : 'UNKNOWN'}, ignore_index=True) -``` - - -```python -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameiso_codeid
0AfghanistanAF1
1AlbaniaAL2
2AlgeriaDZ3
3American SamoaAS4
4AndorraAD5
............
246ZambiaZM247
247ZimbabweZW248
248Åland IslandsAX249
249International AgencyOI250
250UNKNOWN__999999
-

251 rows × 3 columns

-
- - - - -```python -# esport JSON -result = country.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/country.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -country.to_csv('sample/country.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export csv -country.to_csv('country.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -country.to_excel('sample/country.xlsx', index=False) -``` diff --git a/import_scripts/01_oacct_countries.py b/import_scripts/01_oacct_countries.py deleted file mode 100644 index 5f4ff631..00000000 --- a/import_scripts/01_oacct_countries.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np - - -# ## Table Countries - -# In[2]: - - -# La table a été corrigée pour ajouter la valeur manquante à la fin : -# International Agency International Agency OI INT 999 -country = pd.read_csv('iso_3166.txt', encoding='utf-8', header=0, sep='\t', na_filter=False) -country - - -# In[3]: - - -country.loc[country['Alpha-2 code'].isnull()] - - -# In[4]: - - -# convertir l'index en id -country = country.reset_index() -country - - -# In[5]: - - -country['id'] = country['index'] + 1 -del country['index'] -del country['French short name'] -del country['Alpha-3 code'] -del country['Numeric'] -country - - -# In[6]: - - -# renommer les colonnes -country = country.rename(columns={'Alpha-2 code' : 'iso_code', 'English short name' : 'name'}) - - -# In[7]: - - -# ajout de la valeur UNKNOWN -country = country.append({'id' : 999999, 'iso_code' : '__', 'name' : 'UNKNOWN'}, ignore_index=True) - - -# In[8]: - - -country - - -# In[9]: - - -# esport JSON -result = country.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/country.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[10]: - - -# export csv -country.to_csv('sample/country.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[11]: - - -# export csv -country.to_csv('country.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[12]: - - -# export excel -country.to_excel('sample/country.xlsx', index=False) - diff --git a/import_scripts/02_oacct_languages.md b/import_scripts/02_oacct_languages.md deleted file mode 100644 index efcffbdd..00000000 --- a/import_scripts/02_oacct_languages.md +++ /dev/null @@ -1,694 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - - -```python -import pandas as pd -import csv -import json -import numpy as np -``` - -## Table Language - - -```python -# https://www.loc.gov/standards/iso639-2/php/code_list.php -# ISO 639-2 Code ISO 639-1 Code English name of Language French name of Language German name of Language -language = pd.read_csv('ISO-639-2_utf-8.txt', encoding='utf-8', header=None, sep='|', na_filter=False, names=['ISO 639-2 Code', 'ISO 639-1 Code', 'ignore', 'English name of Language', 'French name of Language'], index_col=False) -language -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISO 639-2 CodeISO 639-1 CodeignoreEnglish name of LanguageFrench name of Language
0aaraaAfarafar
1abkabAbkhazianabkhaze
2aceAchineseaceh
3achAcoliacoli
4adaAdangmeadangme
..................
482zndZande languageszandé, langues
483zulzuZuluzoulou
484zunZunizuni
485zxxNo linguistic content; Not applicablepas de contenu linguistique; non applicable
486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazakizaza; dimili; dimli; kirdki; kirmanjki; zazaki
-

487 rows × 5 columns

-
- - - - -```python -language.loc[language['ISO 639-2 Code'].isnull()] -``` - - - - -
- - - - - - - - - - - - - - -
ISO 639-2 CodeISO 639-1 CodeignoreEnglish name of LanguageFrench name of Language
-
- - - - -```python -# convertir l'index en id -language = language.reset_index() -language -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexISO 639-2 CodeISO 639-1 CodeignoreEnglish name of LanguageFrench name of Language
00aaraaAfarafar
11abkabAbkhazianabkhaze
22aceAchineseaceh
33achAcoliacoli
44adaAdangmeadangme
.....................
482482zndZande languageszandé, langues
483483zulzuZuluzoulou
484484zunZunizuni
485485zxxNo linguistic content; Not applicablepas de contenu linguistique; non applicable
486486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazakizaza; dimili; dimli; kirdki; kirmanjki; zazaki
-

487 rows × 6 columns

-
- - - - -```python -language['id'] = language['index'] + 1 -del language['index'] -del language['ignore'] -del language['French name of Language'] -del language['ISO 639-1 Code'] -language -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISO 639-2 CodeEnglish name of Languageid
0aarAfar1
1abkAbkhazian2
2aceAchinese3
3achAcoli4
4adaAdangme5
............
482zndZande languages483
483zulZulu484
484zunZuni485
485zxxNo linguistic content; Not applicable486
486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki487
-

487 rows × 3 columns

-
- - - - -```python -# renommer les colonnes -language = language.rename(columns={'ISO 639-2 Code' : 'iso_code', 'English name of Language' : 'name'}) -``` - - -```python -language -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iso_codenameid
0aarAfar1
1abkAbkhazian2
2aceAchinese3
3achAcoli4
4adaAdangme5
............
482zndZande languages483
483zulZulu484
484zunZuni485
485zxxNo linguistic content; Not applicable486
486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki487
-

487 rows × 3 columns

-
- - - - -```python -# corriger la valeur trop longue qaa-qtz -language.loc[language['iso_code'] == 'qaa-qtz', 'iso_code'] = 'qaa' -``` - - -```python -# ajout de la valeur UNKNOWN -language = language.append({'id' : 999999, 'iso_code' : '___', 'name' : 'UNKNOWN'}, ignore_index=True) -language -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iso_codenameid
0aarAfar1
1abkAbkhazian2
2aceAchinese3
3achAcoli4
4adaAdangme5
............
483zulZulu484
484zunZuni485
485zxxNo linguistic content; Not applicable486
486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki487
487___UNKNOWN999999
-

488 rows × 3 columns

-
- - - - -```python -# esport JSON -result = language.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/language.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -language.to_csv('language.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export csv -language.to_csv('sample/language.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -language.to_excel('sample/language.xlsx', index=False) -``` diff --git a/import_scripts/02_oacct_languages.py b/import_scripts/02_oacct_languages.py deleted file mode 100644 index 7f859fdc..00000000 --- a/import_scripts/02_oacct_languages.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np - - -# ## Table Language - -# In[2]: - - -# https://www.loc.gov/standards/iso639-2/php/code_list.php -# ISO 639-2 Code ISO 639-1 Code English name of Language French name of Language German name of Language -language = pd.read_csv('ISO-639-2_utf-8.txt', encoding='utf-8', header=None, sep='|', na_filter=False, names=['ISO 639-2 Code', 'ISO 639-1 Code', 'ignore', 'English name of Language', 'French name of Language'], index_col=False) -language - - -# In[3]: - - -language.loc[language['ISO 639-2 Code'].isnull()] - - -# In[4]: - - -# convertir l'index en id -language = language.reset_index() -language - - -# In[5]: - - -language['id'] = language['index'] + 1 -del language['index'] -del language['ignore'] -del language['French name of Language'] -del language['ISO 639-1 Code'] -language - - -# In[6]: - - -# renommer les colonnes -language = language.rename(columns={'ISO 639-2 Code' : 'iso_code', 'English name of Language' : 'name'}) - - -# In[7]: - - -language - - -# In[8]: - - -# corriger la valeur trop longue qaa-qtz -language.loc[language['iso_code'] == 'qaa-qtz', 'iso_code'] = 'qaa' - - -# In[9]: - - -# ajout de la valeur UNKNOWN -language = language.append({'id' : 999999, 'iso_code' : '___', 'name' : 'UNKNOWN'}, ignore_index=True) -language - - -# In[10]: - - -# esport JSON -result = language.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/language.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[11]: - - -# export csv -language.to_csv('language.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[12]: - - -# export csv -language.to_csv('sample/language.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[13]: - - -# export excel -language.to_excel('sample/language.xlsx', index=False) - diff --git a/import_scripts/03_oacct_journals.md b/import_scripts/03_oacct_journals.md deleted file mode 100644 index b47e42df..00000000 --- a/import_scripts/03_oacct_journals.md +++ /dev/null @@ -1,17070 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Extraction des données des revues - - -## Corpus initial - -ISSNs des revues des publication archivées sur l'AoU UNIGE et sur Infoscience EPFL - -* Fichier des ISSNs de l'AoU exporté le 16.10.2020 -* Fichier des ISSNs de Infoscience exporté le 28.01.2021 -* Données extraits à partir du JSON de ISSN.org - - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -# paramètre pour le nombre de journaux dans le sample (0 pour prendre tout) -journals_sample_n = 1000 -``` - -## Table OA categories - -* 1 : UNKNOWN -* 2 : Green -* 3 : Hybrid -* 4 : Full -* 5 : Gold -* 6 : Diamond - - -```python -# creation du DF -col_names = ['id', - 'status', - 'description', - 'subscription', - 'accepted_manuscript', - 'apc', - 'final_version' - ] -oas = pd.DataFrame(columns = col_names) -oas -``` - - - - -
- - - - - - - - - - - - - - - - -
idstatusdescriptionsubscriptionaccepted_manuscriptapcfinal_version
-
- - - - -```python -# ajout des valeurs -oas = oas.append({'id' : 1, 'status' : 'UNKNOWN', 'description' : '', 'subscription' : 0, 'accepted_manuscript' : 0, 'apc' : 0, 'final_version' : 0}, ignore_index=True) -oas = oas.append({'id' : 2, 'status' : 'Green', 'description' : 'Paywalled access journal, usually allows the archive of submitted or accepted version on institutional repositories (embargo periods may apply)', 'subscription' : 1, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 0}, ignore_index=True) -oas = oas.append({'id' : 3, 'status' : 'hybrid', 'description' : 'Paywalled access journal, offers several Open Access upon payment of APCs. It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 1, 'accepted_manuscript' : 1, 'apc' : 1, 'final_version' : 1}, ignore_index=True) -# oas = oas.append({'id' : 4, 'status' : 'Full', 'description' : 'No subscription, Green or Gold', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 1}, ignore_index=True) -oas = oas.append({'id' : 5, 'status' : 'Gold', 'description' : 'Open Access journal (payment of APCs may apply). It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 1, 'final_version' : 1}, ignore_index=True) -oas = oas.append({'id' : 6, 'status' : 'Diamond', 'description' : 'Open Access journal (without payment of APCs). It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 1}, ignore_index=True) -``` - - -```python -oas -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idstatusdescriptionsubscriptionaccepted_manuscriptapcfinal_version
01UNKNOWN0000
12GreenPaywalled access journal, usually allows the a...1100
23hybridPaywalled access journal, offers several Open ...1111
35GoldOpen Access journal (payment of APCs may apply...0111
46DiamondOpen Access journal (without payment of APCs)....0101
-
- - - - -```python -# esport JSON -result = oas.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/oa.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -oas.to_csv('sample/oa.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -oas.to_excel('sample/oa.xlsx', index=False) -``` - -## Table Journals - - -```python -issns = pd.read_csv('issn/issns_count.tsv', encoding='utf-8', header=0, sep='\t') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issncount_unigecount_epflcount
01660-93791654.02.01656.0
10031-9007602.0678.01280.0
21932-6203608.0340.0948.0
32174-8454732.00.0732.0
41098-0121334.0393.0727.0
...............
135931471-01531.00.01.0
135942257-52941.00.01.0
135950950-92401.00.01.0
135961868-18831.00.01.0
135971063-68890.01.01.0
-

13598 rows × 4 columns

-
- - - - -```python -# ajout des colonnes -issns.insert(0, 'id', '', False) -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissncount_unigecount_epflcount
01660-93791654.02.01656.0
10031-9007602.0678.01280.0
21932-6203608.0340.0948.0
32174-8454732.00.0732.0
41098-0121334.0393.0727.0
..................
135931471-01531.00.01.0
135942257-52941.00.01.0
135950950-92401.00.01.0
135961868-18831.00.01.0
135971063-68890.01.01.0
-

13598 rows × 5 columns

-
- - - - -```python -# convertir l'index en id -issns = issns.reset_index() -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexidissncount_unigecount_epflcount
001660-93791654.02.01656.0
110031-9007602.0678.01280.0
221932-6203608.0340.0948.0
332174-8454732.00.0732.0
441098-0121334.0393.0727.0
.....................
13593135931471-01531.00.01.0
13594135942257-52941.00.01.0
13595135950950-92401.00.01.0
13596135961868-18831.00.01.0
13597135971063-68890.01.01.0
-

13598 rows × 6 columns

-
- - - - -```python -# ajout de l'id avec l'index + 1 -issns['id'] = issns['index'] + 1 -del issns['index'] -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissncount_unigecount_epflcount
011660-93791654.02.01656.0
120031-9007602.0678.01280.0
231932-6203608.0340.0948.0
342174-8454732.00.0732.0
451098-0121334.0393.0727.0
..................
13593135941471-01531.00.01.0
13594135952257-52941.00.01.0
13595135960950-92401.00.01.0
13596135971868-18831.00.01.0
13597135981063-68890.01.01.0
-

13598 rows × 5 columns

-
- - - - -```python -# reduction à X journaux pour l'échantillon de test -if journals_sample_n > 0 : - issns = issns.loc[:journals_sample_n] -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissncount_unigecount_epflcount
011660-93791654.02.01656.0
120031-9007602.0678.01280.0
231932-6203608.0340.0948.0
342174-8454732.00.0732.0
451098-0121334.0393.0727.0
..................
9969970964-17261.020.021.0
9979980022-346821.00.021.0
9989991432-206417.04.021.0
99910000960-14815.016.021.0
100010010161-756721.00.021.0
-

1001 rows × 5 columns

-
- - - - -```python -# ajout des ISSN-L -df_issnl = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -df_issnl -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISSNISSN-L
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -df_issnl = df_issnl.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -``` - - -```python -issns = pd.merge(issns, df_issnl, on='issn', how='left') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissncount_unigecount_epflcountissnl
011660-93791654.02.01656.01660-9379
120031-9007602.0678.01280.00031-9007
231932-6203608.0340.0948.01932-6203
342174-8454732.00.0732.02174-8454
451098-0121334.0393.0727.01098-0121
.....................
9969970964-17261.020.021.00964-1726
9979980022-346821.00.021.00022-3468
9989991432-206417.04.021.00178-8051
99910000960-14815.016.021.00960-1481
100010010161-756721.00.021.00161-7567
-

1001 rows × 6 columns

-
- - - - -```python -# creation du DF -# 'oa_status' supprimé pour le moment -col_names = ['id', - 'issn', - 'issnl', - 'title', - 'starting_year', - 'end_year', - 'url', - 'name_short_iso_4' - ] -journals = pd.DataFrame(columns = col_names) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4
-
- - - - -```python -# creation du DF -col_names = ['id', 'iso_code'] -journals_languages = pd.DataFrame(columns = col_names) -journals_languages -``` - - - - -
- - - - - - - - - - - -
idiso_code
-
- - - - -```python -# creation du DF -# 'oa_status' supprimé -col_names = ['id', 'iso_code'] -journals_countries = pd.DataFrame(columns = col_names) -journals_countries -``` - - - - -
- - - - - - - - - - - -
idiso_code
-
- - - - -```python -# extraction des informations à partir des données ISSN.org -for index, row in issns.iterrows(): - myid = row['id'] - myissn = row['issn'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - issnl = np.nan - title = '' - keytitle = '' - starting_year = np.nan - end_year = np.nan - myurl = np.nan - journal_country = np.nan - journal_language = np.nan - keytitle_abbr = np.nan - # export en json - if os.path.exists('issn/data/' + myissn + '.json'): - with open('issn/data/' + myissn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + myissn): - if ('mainTitle' in x): - title = x['mainTitle'] - else : - if ('name' in x): - title = x['name'] - # print(myissn) - if ('startDate' in x): - starting_year = x['startDate'] - if ('endDate' in x): - end_year = x['endDate'] - if ('url' in x): - urls = x['url'] - if type(urls) is list: - for url in urls: - # Filtrer les URLs des archives : - # www.ncbi.nlm.nih.gov/pmc/* - # www.pubmedcentral.gov/* - # pubmedcentral.nih.gov/* - # bibpurl.oclc.org/* - # www.jstor.org/* - # ieeexplore.ieee.org - # ovidsp.ovid.com - # et garder le premier des restants - myurl = url - if ('ncbi.nlm.nih.gov' not in url - and 'pubmedcentral' not in url - and 'bibpurl.oclc.org' not in url - and 'jstor.org' not in url - and 'ieeexplore.ieee.org' not in url - and 'ovidsp.ovid.com' not in url): - break - else : - myurl = x['url'] - if ('spatial' in x): - countries = x['spatial'] - if type(countries) is list: - for country in countries: - if ('https://www.iso.org/obp/ui/#iso:code:3166:' in country): - journal_country = country[-2:] - journals_countries = journals_countries.append({'id' : myid, 'iso_code' : journal_country}, ignore_index=True) - else : - if ('https://www.iso.org/obp/ui/#iso:code:3166:' in countries): - journal_country = countries[-2:] - journals_countries = journals_countries.append({'id' : myid, 'iso_code' : journal_country}, ignore_index=True) - # langue "inLanguage": "http://id.loc.gov/vocabulary/iso639-2/eng", - if ('inLanguage' in x): - languages = x['inLanguage'] - if type(languages) is list: - for language in languages: - journal_language = language[-3:] - journals_languages = journals_languages.append({'id' : myid, 'iso_code' : journal_language}, ignore_index=True) - else : - journal_language = languages[-3:] - journals_languages = journals_languages.append({'id' : myid, 'iso_code' : journal_language}, ignore_index=True) - if (x['@id'] == 'resource/ISSN/' + myissn + '#KeyTitle'): - if ('value' in x): - keytitle = x['value'] - if (x['@id'] == 'resource/ISSN/' + myissn + '#ISSN-L'): - if ('value' in x): - issnl = x['value'] - # "@id": "resource/ISSN/1098-0121#AbbreviatedKeyTitle", - if (x['@id'] == 'resource/ISSN/' + myissn + '#AbbreviatedKeyTitle'): - if ('value' in x): - mykeytitle_abbrs = x['value'] - if type(mykeytitle_abbrs) is list: - for mykeytitle_abbr in mykeytitle_abbrs: - print(myissn + ' - AbbreviatedKeyTitle is a list ' + mykeytitle_abbr) - keytitle_abbr = mykeytitle_abbr - with open('sample/03_journals_issn_multiple_titles.txt', 'a', encoding='utf-8') as g: - g.write(myissn + ' AbbreviatedKeyTitle is a list ' + mykeytitle_abbr + '\n') - break - else : - keytitle_abbr = mykeytitle_abbrs - if keytitle != '' : - title = keytitle - if title != '' : - # supprimer le point à la fin - if (title[-1] == '.'): - title = title[0:-1] - # remplacer les caractères spéciaux ˜The œ - if type(title) is list: - for mytitlei in title: - print(myissn + ' - title is a list ' + mytitlei) - title = str.replace(mytitlei, '˜The œ', 'The ') - with open('sample/03_journals_issn_multiple_titles.txt', 'a', encoding='utf-8') as g: - g.write(myissn + ' title is a list ' + mytitlei + '\n') - break - else : - title = str.replace(title, '˜The œ', 'The ') - else : - print(row['issn'] + ' - not found') - with open('sample/03_journals_issn_errors.txt', 'a', encoding='utf-8') as g: - g.write(row['issn'] + ' not found \n') - journals.at[index,'id'] = myid - journals.at[index,'title'] = title - journals.at[index,'issn'] = myissn - journals.at[index,'issnl'] = issnl - journals.at[index,'starting_year'] = starting_year - journals.at[index,'end_year'] = end_year - journals.at[index,'url'] = myurl - journals.at[index,'name_short_iso_4'] = keytitle_abbr -``` - - 0 - 10 - 1094-4087 - AbbreviatedKeyTitle is a list Opt Express - 20 - 30 - 40 - 50 - 60 - 70 - 80 - 90 - 100 - 110 - 120 - 130 - 140 - 150 - 160 - 170 - 0899-823X - AbbreviatedKeyTitle is a list Infect. control hosp. epidemiol. - 180 - 190 - 200 - 210 - 220 - 230 - 240 - 250 - 260 - 270 - 280 - 290 - 300 - 0370-693 - not found - 310 - 320 - 330 - 340 - 350 - 360 - 370 - 380 - 390 - 400 - 410 - 420 - 430 - 440 - 450 - 460 - 470 - 480 - 490 - 500 - 510 - 520 - 530 - 540 - 1544-9173 - AbbreviatedKeyTitle is a list PLoS Biol - 550 - 560 - 570 - 580 - 590 - 600 - 610 - 620 - 0277-86X - not found - 630 - 640 - 650 - 0003-951 - not found - 660 - 670 - 680 - 690 - 700 - 710 - 720 - 730 - 740 - 750 - 760 - 770 - 780 - 790 - 1931-3128 - AbbreviatedKeyTitle is a list Cell Host Microbe - 800 - 810 - 820 - 830 - 840 - 850 - 860 - 870 - 880 - 890 - 900 - 910 - 920 - 930 - 940 - 950 - 960 - 970 - 980 - 990 - 1000 - - - -```python -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN
342174-84542174-8454EU-topías20119999NaNEU-topías
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.
...........................
9969970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)
9979980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)
9989991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)
99910000960-14810960-1481Renewable energy19919999NaNRenew. energy
100010010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...
-

1001 rows × 8 columns

-
- - - - -```python -# titres vides -journals.loc[journals['title'] == ''] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4
3093100370-693NaNNaNNaNNaNNaN
3613620777-5466NaN||||||||NaNNaN
6296300277-86XNaNNaNNaNNaNNaN
6566570003-951NaNNaNNaNNaNNaN
8408411089-5647NaNNaNNaNNaNNaN
-
- - - - -```python -# export csv des titres vides -journals.loc[journals['title'] == ''].to_csv('sample/journals_sans_titre.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel des ids -journals.loc[journals['title'] == ''].to_excel('sample/journals_sans_titre.xlsx', index=False) -``` - - -```python -# garder les lignes avec titre -journals = journals.loc[journals['title'] != ''] -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN
342174-84542174-8454EU-topías20119999NaNEU-topías
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.
...........................
9969970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)
9979980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)
9989991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)
99910000960-14810960-1481Renewable energy19919999NaNRenew. energy
100010010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...
-

996 rows × 8 columns

-
- - - - -```python -journals.shape[0] -``` - - - - - 996 - - - -## Languages - - -```python -journals_languages -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idiso_code
01fre
12eng
23eng
34eng
44fre
.........
1117997eng
1118998eng
1119999eng
11201000eng
11211001eng
-

1122 rows × 2 columns

-
- - - - -```python -# ouvrir la table des langues -languages = pd.read_csv('sample/language.tsv', encoding='utf-8', header=0, sep='\t') -languages -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iso_codenameid
0aarAfar1
1abkAbkhazian2
2aceAchinese3
3achAcoli4
4adaAdangme5
............
483zulZulu484
484zunZuni485
485zxxNo linguistic content; Not applicable486
486zzaZaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki487
487___UNKNOWN999999
-

488 rows × 3 columns

-
- - - - -```python -# renommer les colonnes -del languages['name'] -languages = languages.rename(columns={'id' : 'language'}) -``` - - -```python -# merge avec languages -journals_languages = pd.merge(journals_languages, languages, on='iso_code', how='left') -journals_languages -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idiso_codelanguage
01fre138
12eng124
23eng124
34eng124
44fre138
............
1117997eng124
1118998eng124
1119999eng124
11201000eng124
11211001eng124
-

1122 rows × 3 columns

-
- - - - -```python -# concat valeurs avec même id -journals_languages['language'] = journals_languages['language'].astype(str) -journals_languages = journals_languages.groupby('id').agg({'language': lambda x: ', '.join(x)}) -journals_languages -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
language
id
1138
2124
3124
4124, 138, 402, 292
5124
......
997124
998124
999124
1000124
1001124
-

996 rows × 1 columns

-
- - - - -```python -# recuperation de l'id des langues -journals = pd.merge(journals, journals_languages, on='id', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4language
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124
..............................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)124
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124
-

996 rows × 9 columns

-
- - - -## Countries - - -```python -journals_countries -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idiso_code
01CH
12US
23US
34ES
45US
.........
992997GB
993998US
994999DE
9951000GB
9961001US
-

997 rows × 2 columns

-
- - - - -```python -# ouvrir la table des pays -country = pd.read_csv('sample/country.tsv', encoding='utf-8', header=0, sep='\t') -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameiso_codeid
0AfghanistanAF1
1AlbaniaAL2
2AlgeriaDZ3
3American SamoaAS4
4AndorraAD5
............
246ZambiaZM247
247ZimbabweZW248
248Åland IslandsAX249
249International AgencyOI250
250UNKNOWN__999999
-

251 rows × 3 columns

-
- - - - -```python -# renommer les colonnes -del country['name'] -country = country.rename(columns={'id' : 'country'}) -``` - - -```python -# merge avec countries -journals_countries = pd.merge(journals_countries, country, on='iso_code', how='left') -journals_countries -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idiso_codecountry
01CH215
12US236
23US236
34ES209
45US236
............
992997GB234
993998US236
994999DE83
9951000GB234
9961001US236
-

997 rows × 3 columns

-
- - - - -```python -# concat valeurs avec même id -journals_countries['country'] = journals_countries['country'].astype(str) -journals_countries = journals_countries.groupby('id').agg({'country': lambda x: ', '.join(x)}) -journals_countries -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
country
id
1215
2236
3236
4209
5236
......
997234
998236
99983
1000234
1001236
-

997 rows × 1 columns

-
- - - - -```python -# recuperation de l'id des langues -journals = pd.merge(journals, journals_countries, on='id', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236
.................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236
-

996 rows × 10 columns

-
- - - -### DOAJ - - -```python -# ajout de DOAJ info -doaj = pd.read_csv('doaj/journalcsv__doaj_20210312_0636_utf8.csv', encoding='utf-8', header=0) -doaj -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal titleJournal URLURL in DOAJAlternative titleJournal ISSN (print version)Journal EISSN (online version)KeywordsLanguages in which the journal accepts manuscriptsPublisherCountry of publisher...URL for journal's Open Access statementContinuesContinued ByLCC CodesSubjectsDOAJ SealAdded on DateLast updated DateNumber of Article RecordsMost Recent Article Added
0Anais da Academia Brasileira de Ciênciashttp://www.scielo.br/scielo.php?script=sci_ser...https://doaj.org/toc/ed09859a464f4461b1af34279...Annals of the Brazilian Academy of Sciences0001-37651678-2690biological sciences, exact and earth sciences,...EnglishAcademia Brasileira de CiênciasBrazil...http://www.scielo.br/revistas/aabc/isubscrp.htmNaNNaNQScienceNo2004-04-23T21:31:00Z2017-01-04T14:19:54Z26492020-06-10T21:49:11Z
1ACMEhttp://riviste.unimi.it/index.php/ACMEhttps://doaj.org/toc/b1ca04ba56194f29a362b3eef...NaN0001-494X2282-0035italian literature, classic literature, lingui...ItalianUniversità degli Studi di MilanoItaly...http://riviste.unimi.it/index.php/ACME/about/e...NaNNaNAGeneral WorksNo2014-12-22T19:55:58Z2020-02-24T09:07:42Z1662020-06-19T09:42:34Z
2Acta Dermato-Venereologicahttp://www.medicaljournals.se/actahttps://doaj.org/toc/ffde9666ab1d46f1a8c688ce6...NaN0001-55551651-2057sexually transmitted infections, psoriasis, ps...EnglishSociety for Publication of Acta Dermato-Venere...Sweden...https://www.medicaljournals.se/acta/open-acces...NaNNaNRL1-803Medicine: DermatologyNo2011-11-10T12:31:05Z2017-02-22T11:14:48Z10962021-03-11T13:41:33Z
3Acta Médica Costarricensehttp://actamedica.medicos.cr/index.php/Acta_Me...https://doaj.org/toc/a5919aee5ad2413a89cf32df0...NaN0001-60122215-5856medicine, public health, medical sciences, healthEnglish, SpanishColegio de Médicos y Cirujanos de Costa RicaCosta Rica...http://actamedica.medicos.cr/index.php/Acta_Me...NaNNaNRMedicineNo2020-12-22T11:08:24Z2020-12-22T11:08:24Z12072015-12-08T15:06:43Z
4Acta Mycologicahttps://pbsociety.org.pl/journals/index.php/am...https://doaj.org/toc/0e8e2531ae3f455ebb49acb08...NaN0001-625X2353-074Xmycology, micromycetes, marcomycetes, slime mo...EnglishPolish Botanical SocietyPoland...https://pbsociety.org.pl/journals/index.php/am...NaNNaNQH301-705.5Science: Biology (General)No2014-05-29T20:02:32Z2021-01-16T17:41:32Z11542021-03-05T18:55:46Z
..................................................................
16024BME Frontiershttps://spj.sciencemag.org/bmefhttps://doaj.org/toc/f9fa881c1be5443a86ed71c2e...Biomedical Engineering FrontiersNaN2765-8031biomedical imaging, biomedical devices, biomat...EnglishAmerican Association for the Advancement of Sc...United States...https://spj.sciencemag.org/bmef/about/NaNNaNR855-855.5|TP248.13-248.65Medicine: Medicine (General): Medical technolo...No2021-01-22T11:54:20Z2021-01-22T11:54:20Z112021-03-08T09:06:36Z
16025Harvard Kennedy School Misinformation Reviewhttps://misinforeview.hks.harvard.eduhttps://doaj.org/toc/d71096ec7090499681cc0ccf8...HKS Misinformation ReviewNaN2766-1652misinformation, disinformation, fake newsEnglishHarvard Kennedy SchoolUnited States...https://misinforeview.hks.harvard.edu/editoria...NaNNaNT58.5-58.64|P87-96Technology: Technology (General): Industrial e...No2021-02-12T10:29:21Z2021-02-12T10:29:21Z0NaN
16026One Health & Risk Managementhttps://journal.ohrm.bba.md/index.php/journal-...https://doaj.org/toc/68671b966cd24a0ebaa44d78f...OH&RM2887-34582587-3466one health, risc management, public health, hu...English, Romanian, French, RussianAsociatia de Biosiguranta si BiosecuritateMoldova, Republic of...https://journal.ohrm.bba.md/index.php/journal-...NaNNaNR|QMedicine | ScienceNo2021-03-04T16:06:58Z2021-03-04T16:06:58Z42021-03-04T20:46:57Z
16027فصلنامه پژوهش‌های مدیریت منابع انسانیhttps://hrmj.ihu.ac.ir/?lang=enhttps://doaj.org/toc/87d44ffb6ff849b18d5ddce9c...Journal of Research in Human Resources Management8254-80022645-5072human resources managementPersianImam Hussein UniversityIran, Islamic Republic of...https://hrmj.ihu.ac.ir/?lang=enNaNNaNHF5549-5549.5Social Sciences: Commerce: Business: Personnel...No2021-01-20T11:27:05Z2021-01-20T11:27:05Z0NaN
16028Science of Tsunami Hazardshttp://tsunamisociety.org/https://doaj.org/toc/a4f06be11f4f4db489dc034c7...NaN8755-6839NaNtsunamis, tsunami warning systems, earthquakes...EnglishTsunami Society InternationalUnited States...http://tsunamisociety.org/AboutUs.htmlNaNNaNGC1-1581Geography. Anthropology. Recreation: OceanographyNo2009-04-16T17:40:30Z2016-07-21T16:09:38Z2392021-02-27T01:00:51Z
-

16029 rows × 53 columns

-
- - - - -```python -# ajout ISSNL -doaj['issn'] = doaj['Journal ISSN (print version)'] -doaj.loc[doaj['issn'].isna(), 'issn'] = doaj['Journal EISSN (online version)'] -doaj -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal titleJournal URLURL in DOAJAlternative titleJournal ISSN (print version)Journal EISSN (online version)KeywordsLanguages in which the journal accepts manuscriptsPublisherCountry of publisher...ContinuesContinued ByLCC CodesSubjectsDOAJ SealAdded on DateLast updated DateNumber of Article RecordsMost Recent Article Addedissn
0Anais da Academia Brasileira de Ciênciashttp://www.scielo.br/scielo.php?script=sci_ser...https://doaj.org/toc/ed09859a464f4461b1af34279...Annals of the Brazilian Academy of Sciences0001-37651678-2690biological sciences, exact and earth sciences,...EnglishAcademia Brasileira de CiênciasBrazil...NaNNaNQScienceNo2004-04-23T21:31:00Z2017-01-04T14:19:54Z26492020-06-10T21:49:11Z0001-3765
1ACMEhttp://riviste.unimi.it/index.php/ACMEhttps://doaj.org/toc/b1ca04ba56194f29a362b3eef...NaN0001-494X2282-0035italian literature, classic literature, lingui...ItalianUniversità degli Studi di MilanoItaly...NaNNaNAGeneral WorksNo2014-12-22T19:55:58Z2020-02-24T09:07:42Z1662020-06-19T09:42:34Z0001-494X
2Acta Dermato-Venereologicahttp://www.medicaljournals.se/actahttps://doaj.org/toc/ffde9666ab1d46f1a8c688ce6...NaN0001-55551651-2057sexually transmitted infections, psoriasis, ps...EnglishSociety for Publication of Acta Dermato-Venere...Sweden...NaNNaNRL1-803Medicine: DermatologyNo2011-11-10T12:31:05Z2017-02-22T11:14:48Z10962021-03-11T13:41:33Z0001-5555
3Acta Médica Costarricensehttp://actamedica.medicos.cr/index.php/Acta_Me...https://doaj.org/toc/a5919aee5ad2413a89cf32df0...NaN0001-60122215-5856medicine, public health, medical sciences, healthEnglish, SpanishColegio de Médicos y Cirujanos de Costa RicaCosta Rica...NaNNaNRMedicineNo2020-12-22T11:08:24Z2020-12-22T11:08:24Z12072015-12-08T15:06:43Z0001-6012
4Acta Mycologicahttps://pbsociety.org.pl/journals/index.php/am...https://doaj.org/toc/0e8e2531ae3f455ebb49acb08...NaN0001-625X2353-074Xmycology, micromycetes, marcomycetes, slime mo...EnglishPolish Botanical SocietyPoland...NaNNaNQH301-705.5Science: Biology (General)No2014-05-29T20:02:32Z2021-01-16T17:41:32Z11542021-03-05T18:55:46Z0001-625X
..................................................................
16024BME Frontiershttps://spj.sciencemag.org/bmefhttps://doaj.org/toc/f9fa881c1be5443a86ed71c2e...Biomedical Engineering FrontiersNaN2765-8031biomedical imaging, biomedical devices, biomat...EnglishAmerican Association for the Advancement of Sc...United States...NaNNaNR855-855.5|TP248.13-248.65Medicine: Medicine (General): Medical technolo...No2021-01-22T11:54:20Z2021-01-22T11:54:20Z112021-03-08T09:06:36Z2765-8031
16025Harvard Kennedy School Misinformation Reviewhttps://misinforeview.hks.harvard.eduhttps://doaj.org/toc/d71096ec7090499681cc0ccf8...HKS Misinformation ReviewNaN2766-1652misinformation, disinformation, fake newsEnglishHarvard Kennedy SchoolUnited States...NaNNaNT58.5-58.64|P87-96Technology: Technology (General): Industrial e...No2021-02-12T10:29:21Z2021-02-12T10:29:21Z0NaN2766-1652
16026One Health & Risk Managementhttps://journal.ohrm.bba.md/index.php/journal-...https://doaj.org/toc/68671b966cd24a0ebaa44d78f...OH&RM2887-34582587-3466one health, risc management, public health, hu...English, Romanian, French, RussianAsociatia de Biosiguranta si BiosecuritateMoldova, Republic of...NaNNaNR|QMedicine | ScienceNo2021-03-04T16:06:58Z2021-03-04T16:06:58Z42021-03-04T20:46:57Z2887-3458
16027فصلنامه پژوهش‌های مدیریت منابع انسانیhttps://hrmj.ihu.ac.ir/?lang=enhttps://doaj.org/toc/87d44ffb6ff849b18d5ddce9c...Journal of Research in Human Resources Management8254-80022645-5072human resources managementPersianImam Hussein UniversityIran, Islamic Republic of...NaNNaNHF5549-5549.5Social Sciences: Commerce: Business: Personnel...No2021-01-20T11:27:05Z2021-01-20T11:27:05Z0NaN8254-8002
16028Science of Tsunami Hazardshttp://tsunamisociety.org/https://doaj.org/toc/a4f06be11f4f4db489dc034c7...NaN8755-6839NaNtsunamis, tsunami warning systems, earthquakes...EnglishTsunami Society InternationalUnited States...NaNNaNGC1-1581Geography. Anthropology. Recreation: OceanographyNo2009-04-16T17:40:30Z2016-07-21T16:09:38Z2392021-02-27T01:00:51Z8755-6839
-

16029 rows × 54 columns

-
- - - - -```python -doaj = pd.merge(doaj, df_issnl, on='issn', how='left') -doaj -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal titleJournal URLURL in DOAJAlternative titleJournal ISSN (print version)Journal EISSN (online version)KeywordsLanguages in which the journal accepts manuscriptsPublisherCountry of publisher...Continued ByLCC CodesSubjectsDOAJ SealAdded on DateLast updated DateNumber of Article RecordsMost Recent Article Addedissnissnl
0Anais da Academia Brasileira de Ciênciashttp://www.scielo.br/scielo.php?script=sci_ser...https://doaj.org/toc/ed09859a464f4461b1af34279...Annals of the Brazilian Academy of Sciences0001-37651678-2690biological sciences, exact and earth sciences,...EnglishAcademia Brasileira de CiênciasBrazil...NaNQScienceNo2004-04-23T21:31:00Z2017-01-04T14:19:54Z26492020-06-10T21:49:11Z0001-37650001-3765
1ACMEhttp://riviste.unimi.it/index.php/ACMEhttps://doaj.org/toc/b1ca04ba56194f29a362b3eef...NaN0001-494X2282-0035italian literature, classic literature, lingui...ItalianUniversità degli Studi di MilanoItaly...NaNAGeneral WorksNo2014-12-22T19:55:58Z2020-02-24T09:07:42Z1662020-06-19T09:42:34Z0001-494X0001-494X
2Acta Dermato-Venereologicahttp://www.medicaljournals.se/actahttps://doaj.org/toc/ffde9666ab1d46f1a8c688ce6...NaN0001-55551651-2057sexually transmitted infections, psoriasis, ps...EnglishSociety for Publication of Acta Dermato-Venere...Sweden...NaNRL1-803Medicine: DermatologyNo2011-11-10T12:31:05Z2017-02-22T11:14:48Z10962021-03-11T13:41:33Z0001-55550001-5555
3Acta Médica Costarricensehttp://actamedica.medicos.cr/index.php/Acta_Me...https://doaj.org/toc/a5919aee5ad2413a89cf32df0...NaN0001-60122215-5856medicine, public health, medical sciences, healthEnglish, SpanishColegio de Médicos y Cirujanos de Costa RicaCosta Rica...NaNRMedicineNo2020-12-22T11:08:24Z2020-12-22T11:08:24Z12072015-12-08T15:06:43Z0001-60120001-6012
4Acta Mycologicahttps://pbsociety.org.pl/journals/index.php/am...https://doaj.org/toc/0e8e2531ae3f455ebb49acb08...NaN0001-625X2353-074Xmycology, micromycetes, marcomycetes, slime mo...EnglishPolish Botanical SocietyPoland...NaNQH301-705.5Science: Biology (General)No2014-05-29T20:02:32Z2021-01-16T17:41:32Z11542021-03-05T18:55:46Z0001-625X0001-625X
..................................................................
16024BME Frontiershttps://spj.sciencemag.org/bmefhttps://doaj.org/toc/f9fa881c1be5443a86ed71c2e...Biomedical Engineering FrontiersNaN2765-8031biomedical imaging, biomedical devices, biomat...EnglishAmerican Association for the Advancement of Sc...United States...NaNR855-855.5|TP248.13-248.65Medicine: Medicine (General): Medical technolo...No2021-01-22T11:54:20Z2021-01-22T11:54:20Z112021-03-08T09:06:36Z2765-8031NaN
16025Harvard Kennedy School Misinformation Reviewhttps://misinforeview.hks.harvard.eduhttps://doaj.org/toc/d71096ec7090499681cc0ccf8...HKS Misinformation ReviewNaN2766-1652misinformation, disinformation, fake newsEnglishHarvard Kennedy SchoolUnited States...NaNT58.5-58.64|P87-96Technology: Technology (General): Industrial e...No2021-02-12T10:29:21Z2021-02-12T10:29:21Z0NaN2766-1652NaN
16026One Health & Risk Managementhttps://journal.ohrm.bba.md/index.php/journal-...https://doaj.org/toc/68671b966cd24a0ebaa44d78f...OH&RM2887-34582587-3466one health, risc management, public health, hu...English, Romanian, French, RussianAsociatia de Biosiguranta si BiosecuritateMoldova, Republic of...NaNR|QMedicine | ScienceNo2021-03-04T16:06:58Z2021-03-04T16:06:58Z42021-03-04T20:46:57Z2887-3458NaN
16027فصلنامه پژوهش‌های مدیریت منابع انسانیhttps://hrmj.ihu.ac.ir/?lang=enhttps://doaj.org/toc/87d44ffb6ff849b18d5ddce9c...Journal of Research in Human Resources Management8254-80022645-5072human resources managementPersianImam Hussein UniversityIran, Islamic Republic of...NaNHF5549-5549.5Social Sciences: Commerce: Business: Personnel...No2021-01-20T11:27:05Z2021-01-20T11:27:05Z0NaN8254-8002NaN
16028Science of Tsunami Hazardshttp://tsunamisociety.org/https://doaj.org/toc/a4f06be11f4f4db489dc034c7...NaN8755-6839NaNtsunamis, tsunami warning systems, earthquakes...EnglishTsunami Society InternationalUnited States...NaNGC1-1581Geography. Anthropology. Recreation: OceanographyNo2009-04-16T17:40:30Z2016-07-21T16:09:38Z2392021-02-27T01:00:51Z8755-68398755-6839
-

16029 rows × 55 columns

-
- - - - -```python -doaj.columns -``` - - - - - Index(['Journal title', 'Journal URL', 'URL in DOAJ', 'Alternative title', - 'Journal ISSN (print version)', 'Journal EISSN (online version)', - 'Keywords', 'Languages in which the journal accepts manuscripts', - 'Publisher', 'Country of publisher', 'Society or institution', - 'Country of society or institution', 'Journal license', - 'License attributes', 'URL for license terms', - 'Machine-readable CC licensing information embedded or displayed in articles', - 'URL to an example page with embedded licensing information', - 'Author holds copyright without restrictions', - 'Copyright information URL', 'Review process', - 'Review process information URL', 'Journal plagiarism screening policy', - 'Plagiarism information URL', 'URL for journal's aims & scope', - 'URL for the Editorial Board page', - 'URL for journal's instructions for authors', - 'Average number of weeks between article submission and publication', - 'APC', 'APC information URL', 'APC amount', - 'Journal waiver policy (for developing country authors etc)', - 'Waiver policy information URL', 'Has other fees', - 'Other submission fees information URL', 'Preservation Services', - 'Preservation Service: national library', - 'Preservation information URL', 'Deposit policy directory', - 'URL for deposit policy', 'Persistent article identifiers', - 'Article metadata includes ORCIDs', - 'Journal complies with I4OC standards for open citations', - 'Does this journal allow unrestricted reuse in compliance with BOAI?', - 'URL for journal's Open Access statement', 'Continues', 'Continued By', - 'LCC Codes', 'Subjects', 'DOAJ Seal', 'Added on Date', - 'Last updated Date', 'Number of Article Records', - 'Most Recent Article Added', 'issn', 'issnl'], - dtype='object') - - - - -```python -doaj['Preservation Services'] -``` - - - - - 0 NaN - 1 NaN - 2 NaN - 3 PKP PN - 4 NaN - ... - 16024 NaN - 16025 NaN - 16026 NaN - 16027 NaN - 16028 NaN - Name: Preservation Services, Length: 16029, dtype: object - - - - -```python -doaj['DOAJ Seal'] -``` - - - - - 0 No - 1 No - 2 No - 3 No - 4 No - .. - 16024 No - 16025 No - 16026 No - 16027 No - 16028 No - Name: DOAJ Seal, Length: 16029, dtype: object - - - - -```python -doaj['issnl'] -``` - - - - - 0 0001-3765 - 1 0001-494X - 2 0001-5555 - 3 0001-6012 - 4 0001-625X - ... - 16024 NaN - 16025 NaN - 16026 NaN - 16027 NaN - 16028 8755-6839 - Name: issnl, Length: 16029, dtype: object - - - - -```python -doaj['APC'].value_counts() -``` - - - - - No 11567 - Yes 4462 - Name: APC, dtype: int64 - - - - -```python -# ajout des infos de DOAJ : -# Journal title -# DOAJ Seal -doaj_for_merge = doaj[['issnl', 'Journal title', 'DOAJ Seal', 'APC']] -doaj_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlJournal titleDOAJ SealAPC
00001-3765Anais da Academia Brasileira de CiênciasNoNo
10001-494XACMENoNo
20001-5555Acta Dermato-VenereologicaNoYes
30001-6012Acta Médica CostarricenseNoNo
40001-625XActa MycologicaNoYes
...............
16024NaNBME FrontiersNoNo
16025NaNHarvard Kennedy School Misinformation ReviewNoNo
16026NaNOne Health & Risk ManagementNoNo
16027NaNفصلنامه پژوهش‌های مدیریت منابع انسانیNoNo
160288755-6839Science of Tsunami HazardsNoNo
-

16029 rows × 4 columns

-
- - - - -```python -# renommer les colonnes -doaj_for_merge = doaj_for_merge.rename(columns={'Journal title' : 'doaj_title', 'DOAJ Seal' : 'doaj_seal'}) -doaj_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnldoaj_titledoaj_sealAPC
00001-3765Anais da Academia Brasileira de CiênciasNoNo
10001-494XACMENoNo
20001-5555Acta Dermato-VenereologicaNoYes
30001-6012Acta Médica CostarricenseNoNo
40001-625XActa MycologicaNoYes
...............
16024NaNBME FrontiersNoNo
16025NaNHarvard Kennedy School Misinformation ReviewNoNo
16026NaNOne Health & Risk ManagementNoNo
16027NaNفصلنامه پژوهش‌های مدیریت منابع انسانیNoNo
160288755-6839Science of Tsunami HazardsNoNo
-

16029 rows × 4 columns

-
- - - - -```python -# merge avec journals -journals = pd.merge(journals, doaj_for_merge, on='issnl', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPC
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONEYesYes
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN
..........................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN
-

996 rows × 13 columns

-
- - - - -```python -# ajouter info sur la presence sur DOAJ ou du seal -journals.loc[journals['doaj_title'].isna(), 'doaj_status'] = 0 -journals.loc[~journals['doaj_title'].isna(), 'doaj_status'] = 1 -journals.loc[journals['doaj_seal'] == 'Yes', 'doaj_seal'] = 1 -journals.loc[journals['doaj_seal'] == 'No', 'doaj_seal'] = 0 -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0
.............................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0
-

996 rows × 14 columns

-
- - - -### LOCKSS - - -```python -# ajout des infos de preservation LOCKSS, Portico et Licences Nationales -lockss = pd.read_csv('lockss/keepers-LOCKSS-report.csv', encoding='utf-8', header=0, skiprows=1) -lockss -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleISSNeISSNPreserved VolumesPreserved YearsIn Progress VolumesIn Progress Years
0ARKAT USAARKIVOC1551-70041551-70122000; 2001; 2002; 2003; 2004; 2004; 2004; 20052000; 2001; 2002; 2003; 2004; 2004; 2004; 2005NaNNaN
1Ab ImperioAb Imperio2166-40722164-97312005; 2006; 2007; 2008; 2009; 2010; 2011; 2012...2000; 2001; 2002; 2003; 2004; 2005; 2005; 2006...NaN2020
2Absinthe Literary ReviewAbsinthe Literary ReviewNaN1939-0343NaN2003; 2004; 2005NaNNaN
3Academy HealtheGEMsNaN2327-92141; 2; 2; 3; 42013; 2014; 2014; 2015; 2016NaNNaN
4Academy of American Franciscan HistoryThe Americas0003-16151533-624757; 58; 59; 60; 61; 62; 63; 64; 65; 66; 67; 68...2000; 2001; 2002; 2003; 2004; 2005; 2006; 2007...NaNNaN
...........................
14988Youngstown State University Center for Judaic ...Journal of Jewish Identities1946-25221939-79411; 2; 3; 4; 5; 6; 7; 82008; 2009; 2010; 2011; 2012; 2013; 2014; 2015NaNNaN
14989Zoological Society of JapanZoological Science0289-0003NaN12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23...1995; 1996; 1997; 1998; 1999; 2000; 2001; 2002...NaNNaN
14990Zoological Society of Southern AfricaAfrican Zoology1562-70202224-073X41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 522006; 2007; 2008; 2009; 2010; 2011; 2012; 2013...NaNNaN
14991eLife Sciences PublicationseLifeNaN2050-084XNaN2014; 2014; 2014; 2014; 2014; 2014; 2014; 2014...NaNNaN
14992frommann-holzboogSteiner StudiesNaN2698-217XNaNNaN12020
-

14993 rows × 8 columns

-
- - - - -```python -# ajout ISSNL -lockss['issn'] = lockss['eISSN'] -lockss.loc[lockss['eISSN'].isna(), 'issn'] = lockss['ISSN'] -lockss -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleISSNeISSNPreserved VolumesPreserved YearsIn Progress VolumesIn Progress Yearsissn
0ARKAT USAARKIVOC1551-70041551-70122000; 2001; 2002; 2003; 2004; 2004; 2004; 20052000; 2001; 2002; 2003; 2004; 2004; 2004; 2005NaNNaN1551-7012
1Ab ImperioAb Imperio2166-40722164-97312005; 2006; 2007; 2008; 2009; 2010; 2011; 2012...2000; 2001; 2002; 2003; 2004; 2005; 2005; 2006...NaN20202164-9731
2Absinthe Literary ReviewAbsinthe Literary ReviewNaN1939-0343NaN2003; 2004; 2005NaNNaN1939-0343
3Academy HealtheGEMsNaN2327-92141; 2; 2; 3; 42013; 2014; 2014; 2015; 2016NaNNaN2327-9214
4Academy of American Franciscan HistoryThe Americas0003-16151533-624757; 58; 59; 60; 61; 62; 63; 64; 65; 66; 67; 68...2000; 2001; 2002; 2003; 2004; 2005; 2006; 2007...NaNNaN1533-6247
..............................
14988Youngstown State University Center for Judaic ...Journal of Jewish Identities1946-25221939-79411; 2; 3; 4; 5; 6; 7; 82008; 2009; 2010; 2011; 2012; 2013; 2014; 2015NaNNaN1939-7941
14989Zoological Society of JapanZoological Science0289-0003NaN12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23...1995; 1996; 1997; 1998; 1999; 2000; 2001; 2002...NaNNaN0289-0003
14990Zoological Society of Southern AfricaAfrican Zoology1562-70202224-073X41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 522006; 2007; 2008; 2009; 2010; 2011; 2012; 2013...NaNNaN2224-073X
14991eLife Sciences PublicationseLifeNaN2050-084XNaN2014; 2014; 2014; 2014; 2014; 2014; 2014; 2014...NaNNaN2050-084X
14992frommann-holzboogSteiner StudiesNaN2698-217XNaNNaN120202698-217X
-

14993 rows × 9 columns

-
- - - - -```python -lockss = pd.merge(lockss, df_issnl, on='issn', how='left') -lockss -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleISSNeISSNPreserved VolumesPreserved YearsIn Progress VolumesIn Progress Yearsissnissnl
0ARKAT USAARKIVOC1551-70041551-70122000; 2001; 2002; 2003; 2004; 2004; 2004; 20052000; 2001; 2002; 2003; 2004; 2004; 2004; 2005NaNNaN1551-70121551-7004
1Ab ImperioAb Imperio2166-40722164-97312005; 2006; 2007; 2008; 2009; 2010; 2011; 2012...2000; 2001; 2002; 2003; 2004; 2005; 2005; 2006...NaN20202164-97312166-4072
2Absinthe Literary ReviewAbsinthe Literary ReviewNaN1939-0343NaN2003; 2004; 2005NaNNaN1939-03431939-0343
3Academy HealtheGEMsNaN2327-92141; 2; 2; 3; 42013; 2014; 2014; 2015; 2016NaNNaN2327-92142327-9214
4Academy of American Franciscan HistoryThe Americas0003-16151533-624757; 58; 59; 60; 61; 62; 63; 64; 65; 66; 67; 68...2000; 2001; 2002; 2003; 2004; 2005; 2006; 2007...NaNNaN1533-62470003-1615
.................................
14988Youngstown State University Center for Judaic ...Journal of Jewish Identities1946-25221939-79411; 2; 3; 4; 5; 6; 7; 82008; 2009; 2010; 2011; 2012; 2013; 2014; 2015NaNNaN1939-79411939-7941
14989Zoological Society of JapanZoological Science0289-0003NaN12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23...1995; 1996; 1997; 1998; 1999; 2000; 2001; 2002...NaNNaN0289-00030289-0003
14990Zoological Society of Southern AfricaAfrican Zoology1562-70202224-073X41; 42; 43; 44; 45; 46; 47; 48; 49; 50; 51; 522006; 2007; 2008; 2009; 2010; 2011; 2012; 2013...NaNNaN2224-073X1562-7020
14991eLife Sciences PublicationseLifeNaN2050-084XNaN2014; 2014; 2014; 2014; 2014; 2014; 2014; 2014...NaNNaN2050-084X2050-084X
14992frommann-holzboogSteiner StudiesNaN2698-217XNaNNaN120202698-217XNaN
-

14993 rows × 10 columns

-
- - - - -```python -lockss.columns -``` - - - - - Index(['Publisher', 'Title', 'ISSN', 'eISSN', 'Preserved Volumes', - 'Preserved Years', 'In Progress Volumes', 'In Progress Years', 'issn', - 'issnl'], - dtype='object') - - - - -```python -# test des lignes sans merge -lockss.loc[lockss['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleISSNeISSNPreserved VolumesPreserved YearsIn Progress VolumesIn Progress Yearsissnissnl
5Academy of ManagementAcademy of Management Discoveries (AMD)NaN2168-10071; 2; 32015; 2016; 2017NaNNaN2168-1007NaN
28Alliance of Crop, Soil, and Environmental Scie...Soil HorizonsNaN2163-281250; 51; 52; 53; 54; 55; 562009; 2010; 2011; 2012; 2013; 2014; 2015NaNNaN2163-2812NaN
131American Institute of Aeronautics and Astronau...Air Traffic Control Quarterly1064-38182472-57571; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15...1993; 1995; 1996; 1997; 1998; 1999; 2000; 2001...219942472-5757NaN
134American Institute of Aeronautics and Astronau...Journal of Air TransportationNaN2380-945024; 25; 26; 272016; 2017; 2018; 20192820202380-9450NaN
192American Psychiatric Association PublishingPsychiatric Research and Clinical PracticeNaN2575-560912019220202575-5609NaN
.................................
14900Utrecht University LibraryEarly Modern Low CountriesNaN2543-1587NaNNaN1; 2; 3; 42017; 2018; 2019; 20202543-1587NaN
14968White Rose University PressBritish and Irish Orthoptic JournalNaN2516-35906; 7; 8; 9; 10; 11; 12; 13; 14; 162009; 2010; 2011; 2012; 2013; 2014; 2015; 2016...1720202516-3590NaN
14970White Rose University PressUndergraduate Journal of Politics and Internat...NaN2398-59921; 22018; 2019NaNNaN2398-5992NaN
14985World Haiku ClubWorld Haiku ReviewNaNNaN1; 2; 32001; 2002; 2003NaNNaNNaNNaN
14992frommann-holzboogSteiner StudiesNaN2698-217XNaNNaN120202698-217XNaN
-

835 rows × 10 columns

-
- - - - -```python -# utiliser l'ISSN à la place sur ces lignes -lockss.loc[lockss['issnl'].isna(), 'issnl'] = lockss['issn'] -``` - - -```python -# test des lignes sans merge -lockss.loc[lockss['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleISSNeISSNPreserved VolumesPreserved YearsIn Progress VolumesIn Progress Yearsissnissnl
317Association des Amis des CryptogamesCryptogamie, AlgologieNaNNaN32; 33; 34; 35; 36; 37; 382011; 2012; 2013; 2014; 2015; 2016; 2017NaNNaNNaNNaN
318Association des Amis des CryptogamesCryptogamie, BryologieNaNNaN32; 33; 34; 35; 36; 37; 382011; 2012; 2013; 2014; 2015; 2016; 2017NaNNaNNaNNaN
319Association des Amis des CryptogamesCryptogamie, MycologieNaNNaN32; 33; 34; 35; 36; 37; 382011; 2012; 2013; 2014; 2015; 2016; 2017NaNNaNNaNNaN
850Boston College LibrariesFresh Ink: Essays From Boston College's First-...NaNNaN12; 13; 13; 92009; 2010; 2011; 2007NaNNaNNaNNaN
1681Exquisite CorpseExquisite CorpseNaNNaNNaN1999NaNNaNNaNNaN
2032Georgia Southern UniversityIrish Studies SouthNaNNaN12014NaNNaNNaNNaN
2039Georgia Southern UniversityThe Journal of Student Success in WritingNaNNaN12017NaNNaNNaNNaN
3526LOCKSS ProgramLOCKSS CardNaNNaNNaN2005; 2006; 2006; 2006NaNNaNNaNNaN
4721Oxford University PressInternational Immunology Meeting AbstractsNaNNaNNaNNaNNaNNaNNaNNaN
6725Sagamore PublishingJournal of Facility Planning, Design, and Mana...NaNNaN1; 2; 3; 42013; 2014; 2015; 2016NaNNaNNaNNaN
10718State of AlaskaAlaska State DocumentsNaNNaNNaN2005; 2005; 2006; 2006; 2007; 2007; 2008; 2008...NaNNaNNaNNaN
14985World Haiku ClubWorld Haiku ReviewNaNNaN1; 2; 32001; 2002; 2003NaNNaNNaNNaN
-
- - - - -```python -# ajout des infos de LOCKSS : -# Title -lockss_for_merge = lockss[['issnl', 'Title']] -lockss_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlTitle
01551-7004ARKIVOC
12166-4072Ab Imperio
21939-0343Absinthe Literary Review
32327-9214eGEMs
40003-1615The Americas
.........
149881939-7941Journal of Jewish Identities
149890289-0003Zoological Science
149901562-7020African Zoology
149912050-084XeLife
149922698-217XSteiner Studies
-

14993 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -lockss_for_merge = lockss_for_merge.rename(columns={'Title' : 'lockss_title'}) -lockss_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnllockss_title
01551-7004ARKIVOC
12166-4072Ab Imperio
21939-0343Absinthe Literary Review
32327-9214eGEMs
40003-1615The Americas
.........
149881939-7941Journal of Jewish Identities
149890289-0003Zoological Science
149901562-7020African Zoology
149912050-084XeLife
149922698-217XSteiner Studies
-

14993 rows × 2 columns

-
- - - - -```python -# merge avec journals -journals = pd.merge(journals, lockss_for_merge, on='issnl', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_title
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN
................................................
10009970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN
10019980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN
10029991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields
100310000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN
100410010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN
-

1005 rows × 15 columns

-
- - - - -```python -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_title
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN
................................................
10009970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN
10019980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN
10029991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields
100310000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN
100410010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN
-

996 rows × 15 columns

-
- - - - -```python -# ajouter info sur la presence sur LOCKSS -journals.loc[journals['lockss_title'].isna(), 'lockss'] = 0 -journals.loc[~journals['lockss_title'].isna(), 'lockss'] = 1 -journals -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexing.py:376: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - self.obj[key] = _infer_fill_value(value) - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexing.py:494: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - self.obj[item] = s - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockss
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0
...................................................
10009970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0
10019980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0
10029991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0
100310000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0
100410010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0
-

996 rows × 16 columns

-
- - - -### Portico - - -```python -# ajout des infos de preservation Portico -portico = pd.read_excel('portico/e-journals.xlsx', sheet_name='Details', skiprows=2) -portico -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleSocietyPrint ISSNe-ISSNPCAStatusYearsContentSet IdHoldings...Unnamed: 13Unnamed: 14Unnamed: 15Unnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20Unnamed: 21Unnamed: 22
0ACI Information Group (through 2018)ACI Information GroupNaNNaN2374-1406Nopreserved2017-2018ACI Scholarly Blog Content2017 - v. 2017 (January-December), 2018 - v. 2......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1AECL Nuclear ReviewCNL Nuclear ReviewNaN2369-69312369-6923Yespreserved2016-2020ISSN_236969312016 - v. 5 (1-2), 2016/2017 - v. 6 (1-2), 201......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2AECL Nuclear ReviewAECL Nuclear ReviewNaN1929-80561929-6371Yespreserved2014-2015ISSN_192980562014 - v. 1 (1-2), 2014 - v. 2 (1-2), 2014 - v......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3AIP PublishingLow Temperature PhysicsNaN1063-777X1090-6517Yespreserved1997-2021ISSN_1063777X1997 - v. 23 (1-5, 7-12), 1998 - v. 24 (1-12),......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4AIP PublishingPhysics of Fluids A: Fluid DynamicsNaN0899-8213NaNYespreserved1989-1993ISSN_089982131989 - v. 1 (1-12), 1990 - v. 2 (1-12), 1991 -......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
35550Zeal Press Ltd.International Journal of Robotics and Automati...NaNNaN2409-9694NaNqueued-ISSN_24099694_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35551Zeal Press Ltd.Journal of Material Science and Technology Res...NaNNaN2410-4701NaNqueued-ISSN_24104701_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35552Zeal Press Ltd.Journal of Modern Mechanical Engineering and T...NaNNaN2409-9848NaNqueued-ISSN_24099848_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35553Zeal Press Ltd.Journal of Solar Energy Research UpdatesNaNNaN2410-2199NaNqueued-ISSN_24102199_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35554icddr,b (through 2015)Journal of Health, Population and Nutrition (J...NaN1606-0997NaNYespreserved2005-2015ISSN_160609972005 - v. 23 (3-4), 2006 - v. 24 (1-4), 2007 -......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
-

35555 rows × 23 columns

-
- - - - -```python -# ajout ISSNL -portico['issn'] = portico['e-ISSN'] -portico.loc[portico['e-ISSN'].isna(), 'issn'] = portico['Print ISSN'] -portico -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleSocietyPrint ISSNe-ISSNPCAStatusYearsContentSet IdHoldings...Unnamed: 14Unnamed: 15Unnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20Unnamed: 21Unnamed: 22issn
0ACI Information Group (through 2018)ACI Information GroupNaNNaN2374-1406Nopreserved2017-2018ACI Scholarly Blog Content2017 - v. 2017 (January-December), 2018 - v. 2......NaNNaNNaNNaNNaNNaNNaNNaNNaN2374-1406
1AECL Nuclear ReviewCNL Nuclear ReviewNaN2369-69312369-6923Yespreserved2016-2020ISSN_236969312016 - v. 5 (1-2), 2016/2017 - v. 6 (1-2), 201......NaNNaNNaNNaNNaNNaNNaNNaNNaN2369-6923
2AECL Nuclear ReviewAECL Nuclear ReviewNaN1929-80561929-6371Yespreserved2014-2015ISSN_192980562014 - v. 1 (1-2), 2014 - v. 2 (1-2), 2014 - v......NaNNaNNaNNaNNaNNaNNaNNaNNaN1929-6371
3AIP PublishingLow Temperature PhysicsNaN1063-777X1090-6517Yespreserved1997-2021ISSN_1063777X1997 - v. 23 (1-5, 7-12), 1998 - v. 24 (1-12),......NaNNaNNaNNaNNaNNaNNaNNaNNaN1090-6517
4AIP PublishingPhysics of Fluids A: Fluid DynamicsNaN0899-8213NaNYespreserved1989-1993ISSN_089982131989 - v. 1 (1-12), 1990 - v. 2 (1-12), 1991 -......NaNNaNNaNNaNNaNNaNNaNNaNNaN0899-8213
..................................................................
35550Zeal Press Ltd.International Journal of Robotics and Automati...NaNNaN2409-9694NaNqueued-ISSN_24099694_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaN2409-9694
35551Zeal Press Ltd.Journal of Material Science and Technology Res...NaNNaN2410-4701NaNqueued-ISSN_24104701_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaN2410-4701
35552Zeal Press Ltd.Journal of Modern Mechanical Engineering and T...NaNNaN2409-9848NaNqueued-ISSN_24099848_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaN2409-9848
35553Zeal Press Ltd.Journal of Solar Energy Research UpdatesNaNNaN2410-2199NaNqueued-ISSN_24102199_1023-...NaNNaNNaNNaNNaNNaNNaNNaNNaN2410-2199
35554icddr,b (through 2015)Journal of Health, Population and Nutrition (J...NaN1606-0997NaNYespreserved2005-2015ISSN_160609972005 - v. 23 (3-4), 2006 - v. 24 (1-4), 2007 -......NaNNaNNaNNaNNaNNaNNaNNaNNaN1606-0997
-

35555 rows × 24 columns

-
- - - - -```python -portico = pd.merge(portico, df_issnl, on='issn', how='left') -portico -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleSocietyPrint ISSNe-ISSNPCAStatusYearsContentSet IdHoldings...Unnamed: 15Unnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20Unnamed: 21Unnamed: 22issnissnl
0ACI Information Group (through 2018)ACI Information GroupNaNNaN2374-1406Nopreserved2017-2018ACI Scholarly Blog Content2017 - v. 2017 (January-December), 2018 - v. 2......NaNNaNNaNNaNNaNNaNNaNNaN2374-14062374-1406
1AECL Nuclear ReviewCNL Nuclear ReviewNaN2369-69312369-6923Yespreserved2016-2020ISSN_236969312016 - v. 5 (1-2), 2016/2017 - v. 6 (1-2), 201......NaNNaNNaNNaNNaNNaNNaNNaN2369-6923NaN
2AECL Nuclear ReviewAECL Nuclear ReviewNaN1929-80561929-6371Yespreserved2014-2015ISSN_192980562014 - v. 1 (1-2), 2014 - v. 2 (1-2), 2014 - v......NaNNaNNaNNaNNaNNaNNaNNaN1929-63711929-8056
3AIP PublishingLow Temperature PhysicsNaN1063-777X1090-6517Yespreserved1997-2021ISSN_1063777X1997 - v. 23 (1-5, 7-12), 1998 - v. 24 (1-12),......NaNNaNNaNNaNNaNNaNNaNNaN1090-65171063-777X
4AIP PublishingPhysics of Fluids A: Fluid DynamicsNaN0899-8213NaNYespreserved1989-1993ISSN_089982131989 - v. 1 (1-12), 1990 - v. 2 (1-12), 1991 -......NaNNaNNaNNaNNaNNaNNaNNaN0899-82130899-8213
..................................................................
35550Zeal Press Ltd.International Journal of Robotics and Automati...NaNNaN2409-9694NaNqueued-ISSN_24099694_1023-...NaNNaNNaNNaNNaNNaNNaNNaN2409-96942409-9694
35551Zeal Press Ltd.Journal of Material Science and Technology Res...NaNNaN2410-4701NaNqueued-ISSN_24104701_1023-...NaNNaNNaNNaNNaNNaNNaNNaN2410-47012410-4701
35552Zeal Press Ltd.Journal of Modern Mechanical Engineering and T...NaNNaN2409-9848NaNqueued-ISSN_24099848_1023-...NaNNaNNaNNaNNaNNaNNaNNaN2409-98482409-9848
35553Zeal Press Ltd.Journal of Solar Energy Research UpdatesNaNNaN2410-2199NaNqueued-ISSN_24102199_1023-...NaNNaNNaNNaNNaNNaNNaNNaN2410-21992410-2199
35554icddr,b (through 2015)Journal of Health, Population and Nutrition (J...NaN1606-0997NaNYespreserved2005-2015ISSN_160609972005 - v. 23 (3-4), 2006 - v. 24 (1-4), 2007 -......NaNNaNNaNNaNNaNNaNNaNNaN1606-09971606-0997
-

35555 rows × 25 columns

-
- - - - -```python -portico.columns -``` - - - - - Index(['Publisher', 'Title', 'Society', 'Print ISSN', 'e-ISSN', 'PCA', - 'Status', 'Years', 'ContentSet Id', 'Holdings', 'Unnamed: 10', - 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14', - 'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', - 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21', 'Unnamed: 22', 'issn', - 'issnl'], - dtype='object') - - - - -```python -# test des lignes sans merge -portico.loc[portico['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleSocietyPrint ISSNe-ISSNPCAStatusYearsContentSet IdHoldings...Unnamed: 15Unnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20Unnamed: 21Unnamed: 22issnissnl
1AECL Nuclear ReviewCNL Nuclear ReviewNaN2369-69312369-6923Yespreserved2016-2020ISSN_236969312016 - v. 5 (1-2), 2016/2017 - v. 6 (1-2), 201......NaNNaNNaNNaNNaNNaNNaNNaN2369-6923NaN
9AIP PublishingAPL BioengineeringNaNNaN2473-2877Yespreserved2017-2021ISSN_2473428772017 - v. 1 (1), 2018 - v. 2 (1-4), 2019 - v. ......NaNNaNNaNNaNNaNNaNNaNNaN2473-2877NaN
14AIP PublishingBiophysics ReviewsNaNNaN2688-4089Yespreserved2020-2021ISSN_26884089_152020 - v. 1 (1), 2021 - v. 2 (1)...NaNNaNNaNNaNNaNNaNNaNNaN2688-4089NaN
16AIP PublishingJournal of Undergraduate Reports in PhysicsNaNNaN2642-7451Yespreserved2018-2020ISSN_26427451_152018 - v. 28 (1), 2019 - v. 29 (1), 2020 - v. ......NaNNaNNaNNaNNaNNaNNaNNaN2642-7451NaN
20AIP PublishingNanotechnology and Precision EngineeringNaN1672-60302589-5540NaNpreserved2018-2021ISSN_16726030_152018 - v. 1 (1-4), 2019 - v. 2 (1-4), 2020 - v......NaNNaNNaNNaNNaNNaNNaNNaN2589-5540NaN
..................................................................
35539World ScientificDivision of Labor & Transaction CostsNaN0219-87111793-7000Nopreserved2005-2011ISSN_021987112005/2006 - v. 1 (1-2), 2006/2007 - v. 2 (1-2)......NaNNaNNaNNaNNaNNaNNaNNaN1793-7000NaN
35540World ScientificJournal of Medical Robotics ResearchNaN2424-905X2424-9068Nopreserved2016-2020ISSN_2424905X2016 - v. 1 (1-4), 2017 - v. 2 (1-4), 2018 - v......NaNNaNNaNNaNNaNNaNNaNNaN2424-9068NaN
35541World ScientificInternational Journal of Foundations of Comput...NaN0129-05411793-6373Nopreserved1990-2021ISSN_012905411990 - v. 1 (1-4), 1991 - v. 2 (1-4), 1992 - v......NaNNaNNaNNaNNaNNaNNaNNaN1793-6373NaN
35542World ScientificMolecular Frontiers JournalNaN2529-73252529-7333Nopreserved2017-2020ISSN_252973252017 - v. 1 (1-2, null), 2018 - v. 2 (1), 2019......NaNNaNNaNNaNNaNNaNNaNNaN2529-7333NaN
35543World ScientificWater Economics and PolicyNaN2382-624X2382-6258Nopreserved2015-2020ISSN_2382624X2015 - v. 1 (1-4), 2016 - v. 2 (1-4), 2017 - v......NaNNaNNaNNaNNaNNaNNaNNaN2382-6258NaN
-

4086 rows × 25 columns

-
- - - - -```python -# utiliser l'ISSN à la place sur ces lignes -portico.loc[portico['issnl'].isna(), 'issnl'] = portico['issn'] -``` - - -```python -# test des lignes sans merge -portico.loc[portico['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
PublisherTitleSocietyPrint ISSNe-ISSNPCAStatusYearsContentSet IdHoldings...Unnamed: 15Unnamed: 16Unnamed: 17Unnamed: 18Unnamed: 19Unnamed: 20Unnamed: 21Unnamed: 22issnissnl
41ASTM InternationalASTM StandardsNaNNaNNaNYesqueued-ASTM Standards-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
58Academic JournalsInternational Journal of Vocational and Techni...NaNNaNNaNNaNqueued-ISSN_TBD70-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
78Academic JournalsJournal of Metabolomics and Systems BiologyNaNNaNNaNNaNqueued-ISSN_TBD68-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
180Academy of ResearchThe Microfinance JournalNaNNaNNaNNaNqueued-TBD_MJ_1242-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
254African Online Scientific Information Systems ...Journal of African ForesightNaNNaNNaNNaNqueued-ISSN_TBD288-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
34911Wolters Kluwer HealthAJSP OpenNaNNaNNaNYesqueued-TBD_74_1-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
34915Wolters Kluwer HealthAnnals of Surgery OANaNNaNNaNYesqueued-TBD_74_2-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35047Wolters Kluwer HealthOtology & Neurotology OpenNaNNaNNaNYesqueued-TBD_ONO_74-...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35058Wolters Kluwer HealthNorthwest Journal of OptometryNaNNaNNaNYespreserved1924-1925NJO_74v.1(1-12),v.2(1-7)...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
35209Wolters Kluwer HealthOccupational Therapy & RehabilitationNaNNaNNaNYespreserved1925-1951OTR_74v.22(1-6),v.23(1-6),v.24(1-6),v.25(1-6),v.26(1......NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
-

300 rows × 25 columns

-
- - - - -```python -# ajout des infos de Portico : -# Status -portico_for_merge = portico[['issnl', 'Status']] -portico_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlStatus
02374-1406preserved
12369-6923preserved
21929-8056preserved
31063-777Xpreserved
40899-8213preserved
.........
355502409-9694queued
355512410-4701queued
355522409-9848queued
355532410-2199queued
355541606-0997preserved
-

35555 rows × 2 columns

-
- - - - -```python -# garder les lignes "preserved" -portico_for_merge = portico_for_merge.loc[portico_for_merge['Status'] == 'preserved'] -portico_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlStatus
02374-1406preserved
12369-6923preserved
21929-8056preserved
31063-777Xpreserved
40899-8213preserved
.........
355462572-5505preserved
355472225-0719preserved
355482472-0712preserved
355492377-231Xpreserved
355541606-0997preserved
-

33177 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -portico_for_merge = portico_for_merge.rename(columns={'Status' : 'portico_status'}) -portico_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlportico_status
02374-1406preserved
12369-6923preserved
21929-8056preserved
31063-777Xpreserved
40899-8213preserved
.........
355462572-5505preserved
355472225-0719preserved
355482472-0712preserved
355492377-231Xpreserved
355541606-0997preserved
-

33177 rows × 2 columns

-
- - - - -```python -# merge avec journals -journals = pd.merge(journals, portico_for_merge, on='issnl', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0NaN
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved
......................................................
10779980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved
10789991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved
10799991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved
108010000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved
108110010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN
-

1082 rows × 17 columns

-
- - - - -```python -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0NaN
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved
......................................................
10769970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0preserved
10779980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved
10789991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved
108010000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved
108110010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN
-

996 rows × 17 columns

-
- - - - -```python -# ajouter info sur la presence sur portico -journals.loc[journals['portico_status'].isna(), 'portico'] = 0 -journals.loc[~journals['portico_status'].isna(), 'portico'] = 1 -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusportico
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0NaN0.0
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved1.0
.........................................................
10769970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0preserved1.0
10779980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0
10789991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0
108010000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0
108110010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0
-

996 rows × 18 columns

-
- - - -### Licences Nationales - - -```python -# ajout des infos de preservation des Licences nationales -nlch1 = pd.read_excel('licences_nationales/cambridge_Switzerland_NationalLicences_2020-08-17.xlsx') -nlch1 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publication_titleprint_identifieronline_identifierdate_first_issue_onlinenum_first_vol_onlinenum_first_issue_onlinedate_last_issue_onlinenum_last_vol_onlinenum_last_issue_onlinetitle_url...publisher_namepublication_typedate_monograph_published_printdate_monograph_published_onlinemonograph_volumemonograph_editionfirst_editorparent_publication_title_idpreceding_publication_title_idaccess_type
0Journal of Agricultural and Applied Economics1074-0708NaN19691.0NaN201547.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
1Advances in Applied Mathematics and Mechanics2070-07332075-135420113.0NaN20158.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
2Annals of Actuarial Science1748-49951748-500220061.0NaN20159.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
3Advances in Animal Biosciences2040-47002040-471920101.0NaN20156.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
4Archaeologia0261-3409NaN17701.0NaN1992110.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
389Zygote0967-19941469-873019931.0NaN201523.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
390Political Analysis1047-19871476-498919891.0NaN201523.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
391Business and Politics1369-52581469-356919991.0NaN201517.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
392Transactions of the Institute of Actuaries2047-28382398-738318491.0NaN18521.0NaNhttp://www.cambridge.org/core/product/identifi......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
393Transactions of the International Astronomical...NaN0251-107X19221.01.0200725.02.0https://www.cambridge.org/core/journals/procee......Cambridge University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
-

394 rows × 25 columns

-
- - - - -```python -# ajout des infos de preservation des Licences nationales -nlch2 = pd.read_excel('licences_nationales/gruyter_Switzerland_NationalLicences_2020-11-30.xlsx') -nlch2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publication_titleprint_identifieronline_identifierdate_first_issue_onlinenum_first_vol_onlinenum_first_issue_onlinedate_last_issue_onlinenum_last_vol_onlinenum_last_issue_onlinetitle_url...publisher_namepublication_typedate_monograph_published_printdate_monograph_published_onlinemonograph_volumemonograph_editionfirst_editorparent_publication_title_idpreceding_publication_title_idaccess_type
0ABI Technik0720-67632191-4664199616NaN201737.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
1Accounting, Economics, and Law: A Convivium2194-60512152-282020111NaN20177.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
2Advanced Optical Technologies2192-85762192-858420121NaN20176.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
3Advances in Calculus of Variations1864-82581864-826620081NaN201710.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
4Advances in Geometry1615-715X1615-716820011NaN201717.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
..................................................................
339Zeitschrift für Religionswissenschaft0943-86102194-508X19931NaN201725.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
340Zeitschrift für romanische Philologie0049-86611865-906318771NaN2017133.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
341Zeitschrift für Slawistik0044-35062196-701619561NaN201762.0NaNhttps://www.degruyter.com/openurl?genre=journa......De Gruyter (A)serialNaNNaNNaNNaNNaNNaNNaNP
342Zeitschrift für Sprachwissenschaft0721-90671613-370619821NaN201736.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNF
343Zeitschrift für Unternehmens- und Gesellschaft...0340-24791612-704819721NaN201746.0NaNhttps://www.degruyter.com/openurl?genre=journa......De GruyterserialNaNNaNNaNNaNNaNNaNNaNP
-

344 rows × 25 columns

-
- - - - -```python -# ajout des infos de preservation des Licences nationales -nlch3 = pd.read_excel('licences_nationales/oxford_Switzerland_NationalLicences_2020-09-24.xlsx') -nlch3 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publication_titleprint_identifieronline_identifierdate_first_issue_onlinenum_first_vol_onlinenum_first_issue_onlinedate_last_issue_onlinenum_last_vol_onlinenum_last_issue_onlinetitle_url...publisher_namepublication_typedate_monograph_published_printdate_monograph_published_onlinemonograph_volumemonograph_editionfirst_editorparent_publication_title_idpreceding_publication_title_idaccess_type
0Acta Biochimica et Biophysica Sinica1672-91451745-7270201547.0NaN2018NaNNaNhttps://academic.oup.com/abbs...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
1Archives of Clinical Neuropsychology0887-61771873-584319861.0NaN2018NaNNaNhttps://academic.oup.com/acn...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
2Adaptation1755-06371755-064520158.0NaN2018NaNNaNhttps://academic.oup.com/adaptation...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
3American Entomologist1046-28212155-9902199036.0NaN2018NaNNaNhttps://academic.oup.com/ae...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
4Applied Economic Perspectives and Policy1058-71951467-935319881.0NaN2018NaNNaNhttps://academic.oup.com/aepp...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
343The Chinese Journal of Comparative Law2050-48022050-481020186.0NaN2018NaNNaNhttps://academic.oup.com/cjcl...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
344Journal of Nutrition0022-31661541-61002018148.0NaN2018NaNNaNhttps://academic.oup.com/jn...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
345Translational Behavioral Medicine1869-67161613-986020188.0NaN2018NaNNaNhttps://academic.oup.com/tbm...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
346The Western Historical Quarterly0043-38101939-8603201647.0NaN2018NaNNaNhttps://academic.oup.com/whq...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
347Zoological Journal of the Linnean Society0024-40821096-36422017179.0NaN2018NaNNaNhttps://academic.oup.com/zoolinnean...Oxford University PressserialNaNNaNNaNNaNNaNNaNNaNNaN
-

348 rows × 25 columns

-
- - - - -```python -# ajout des infos de preservation des Licences nationales -nlch4 = pd.read_excel('licences_nationales/springer_Switzerland_NationalLicences_2020-08-12.xlsx') -nlch4 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publication_titleprint_identifieronline_identifierdate_first_issue_onlinenum_first_vol_onlinenum_first_issue_onlinedate_last_issue_onlinenum_last_vol_onlinenum_last_issue_onlinetitle_url...coverage_notespublisher_namepublication_typedate_monograph_published_printdate_monograph_published_onlinemonograph_volumemonograph_editionfirst_editorparent_publication_title_idpreceding_publication_title_id
04OR1619-45001614-241120053.01.02015NaNNaNhttp://link.springer.com/journal/10288...NaNSpringer Berlin HeidelbergSerialNaNNaNNaNNaNNaNNaNNaN
1AAPS PharmSciTechNaN1530-993220056.01.02015NaNNaNhttp://link.springer.com/journal/12249...NaNSpringer USSerialNaNNaNNaNNaNNaNNaNNaN
2ADHD Attention Deficit and Hyperactivity Disor...1866-61161866-664720091.01.02014NaNNaNhttp://link.springer.com/journal/12402...NaNSpringer ViennaSerialNaNNaNNaNNaNNaNNaNNaN
3AI & SOCIETY0951-56661435-565519871.01.02015NaNNaNhttp://link.springer.com/journal/146...NaNSpringer LondonSerialNaNNaNNaNNaNNaNNaNNaN
4AIDS and Behavior1090-71651573-325420059.01.02015NaNNaNhttp://link.springer.com/journal/10461...NaNSpringer USSerialNaNNaNNaNNaNNaNNaNNaN
..................................................................
1667neurogenetics1364-67451364-675320056.01.02015NaNNaNhttp://link.springer.com/journal/10048...NaNSpringer Berlin HeidelbergSerialNaNNaNNaNNaNNaNNaNNaN
1668uwf UmweltWirtschaftsForum | Sustainability Ma...0943-34811432-2293200715.01.02015NaNNaNhttp://link.springer.com/journal/550...NaNSpringer Berlin HeidelbergSerialNaNNaNNaNNaNNaNNaNNaN
1669Österreichische Wasser- und Abfallwirtschaft0945-358X1613-7566200557.01.02015NaNNaNhttp://link.springer.com/journal/506...NaNSpringer ViennaSerialNaNNaNNaNNaNNaNNaNNaN
1670Österreichische Zeitschrift für Soziologie1011-00701862-2585200530.01.02015NaNNaNhttp://link.springer.com/journal/11614...NaNSpringer Fachmedien WiesbadenSerialNaNNaNNaNNaNNaNNaNNaN
1671Journal Applied Mathematics Computing1598-58651865-20851905NaNNaN1905NaNNaNhttp://link.springer.com/journal/12190...NaNSpringerSerialNaNNaNNaNNaNNaNNaNNaN
-

1672 rows × 24 columns

-
- - - - -```python -# concatener les 4 -nlch = pd.concat([nlch1, nlch2, nlch3, nlch4], ignore_index=True) -nlch -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version - of pandas will change to not sort by default. - - To accept the future behavior, pass 'sort=False'. - - To retain the current behavior and silence the warning, pass 'sort=True'. - - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
access_typecoverage_depthcoverage_notesdate_first_issue_onlinedate_last_issue_onlinedate_monograph_published_onlinedate_monograph_published_printembargo_infofirst_authorfirst_editor...num_last_vol_onlineonline_identifierparent_publication_title_idpreceding_publication_title_idprint_identifierpublication_titlepublication_typepublisher_nametitle_idtitle_url
0NaNfulltextNaN19692015NaNNaNNaNNaNNaN...47.0NaNNaNNaN1074-0708Journal of Agricultural and Applied EconomicsserialCambridge University Pressaaehttp://www.cambridge.org/core/product/identifi...
1NaNfulltextNaN20112015NaNNaNNaNNaNNaN...8.02075-1354NaNNaN2070-0733Advances in Applied Mathematics and MechanicsserialCambridge University Pressaamhttp://www.cambridge.org/core/product/identifi...
2NaNfulltextNaN20062015NaNNaNNaNNaNNaN...9.01748-5002NaNNaN1748-4995Annals of Actuarial ScienceserialCambridge University Pressaashttp://www.cambridge.org/core/product/identifi...
3NaNfulltextNaN20102015NaNNaNNaNNaNNaN...6.02040-4719NaNNaN2040-4700Advances in Animal BiosciencesserialCambridge University Pressabshttp://www.cambridge.org/core/product/identifi...
4NaNfulltextNaN17701992NaNNaNNaNNaNNaN...110.0NaNNaNNaN0261-3409ArchaeologiaserialCambridge University Pressachhttp://www.cambridge.org/core/product/identifi...
..................................................................
2753NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaN1364-6753NaNNaN1364-6745neurogeneticsSerialSpringer Berlin Heidelberg10048http://link.springer.com/journal/10048
2754NaNfulltextNaN20072015NaNNaNNaNNaNNaN...NaN1432-2293NaNNaN0943-3481uwf UmweltWirtschaftsForum | Sustainability Ma...SerialSpringer Berlin Heidelberg550http://link.springer.com/journal/550
2755NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaN1613-7566NaNNaN0945-358XÖsterreichische Wasser- und AbfallwirtschaftSerialSpringer Vienna506http://link.springer.com/journal/506
2756NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaN1862-2585NaNNaN1011-0070Österreichische Zeitschrift für SoziologieSerialSpringer Fachmedien Wiesbaden11614http://link.springer.com/journal/11614
2757NaNfulltextNaN19051905NaNNaNNaNNaNNaN...NaN1865-2085NaNNaN1598-5865Journal Applied Mathematics ComputingSerialSpringer12190http://link.springer.com/journal/12190
-

2758 rows × 26 columns

-
- - - - -```python -nlch.columns -``` - - - - - Index(['access_type', 'coverage_depth', 'coverage_notes', - 'date_first_issue_online', 'date_last_issue_online', - 'date_monograph_published_online', 'date_monograph_published_print', - 'embargo_info', 'first_author', 'first_editor', 'monograph_edition', - 'monograph_volume', 'notes', 'num_first_issue_online', - 'num_first_vol_online', 'num_last_issue_online', 'num_last_vol_online', - 'online_identifier', 'parent_publication_title_id', - 'preceding_publication_title_id', 'print_identifier', - 'publication_title', 'publication_type', 'publisher_name', 'title_id', - 'title_url'], - dtype='object') - - - - -```python -# ajout ISSNL -nlch['issn'] = nlch['online_identifier'] -nlch.loc[nlch['online_identifier'].isna(), 'issn'] = nlch['print_identifier'] -nlch -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
access_typecoverage_depthcoverage_notesdate_first_issue_onlinedate_last_issue_onlinedate_monograph_published_onlinedate_monograph_published_printembargo_infofirst_authorfirst_editor...online_identifierparent_publication_title_idpreceding_publication_title_idprint_identifierpublication_titlepublication_typepublisher_nametitle_idtitle_urlissn
0NaNfulltextNaN19692015NaNNaNNaNNaNNaN...NaNNaNNaN1074-0708Journal of Agricultural and Applied EconomicsserialCambridge University Pressaaehttp://www.cambridge.org/core/product/identifi...1074-0708
1NaNfulltextNaN20112015NaNNaNNaNNaNNaN...2075-1354NaNNaN2070-0733Advances in Applied Mathematics and MechanicsserialCambridge University Pressaamhttp://www.cambridge.org/core/product/identifi...2075-1354
2NaNfulltextNaN20062015NaNNaNNaNNaNNaN...1748-5002NaNNaN1748-4995Annals of Actuarial ScienceserialCambridge University Pressaashttp://www.cambridge.org/core/product/identifi...1748-5002
3NaNfulltextNaN20102015NaNNaNNaNNaNNaN...2040-4719NaNNaN2040-4700Advances in Animal BiosciencesserialCambridge University Pressabshttp://www.cambridge.org/core/product/identifi...2040-4719
4NaNfulltextNaN17701992NaNNaNNaNNaNNaN...NaNNaNNaN0261-3409ArchaeologiaserialCambridge University Pressachhttp://www.cambridge.org/core/product/identifi...0261-3409
..................................................................
2753NaNfulltextNaN20052015NaNNaNNaNNaNNaN...1364-6753NaNNaN1364-6745neurogeneticsSerialSpringer Berlin Heidelberg10048http://link.springer.com/journal/100481364-6753
2754NaNfulltextNaN20072015NaNNaNNaNNaNNaN...1432-2293NaNNaN0943-3481uwf UmweltWirtschaftsForum | Sustainability Ma...SerialSpringer Berlin Heidelberg550http://link.springer.com/journal/5501432-2293
2755NaNfulltextNaN20052015NaNNaNNaNNaNNaN...1613-7566NaNNaN0945-358XÖsterreichische Wasser- und AbfallwirtschaftSerialSpringer Vienna506http://link.springer.com/journal/5061613-7566
2756NaNfulltextNaN20052015NaNNaNNaNNaNNaN...1862-2585NaNNaN1011-0070Österreichische Zeitschrift für SoziologieSerialSpringer Fachmedien Wiesbaden11614http://link.springer.com/journal/116141862-2585
2757NaNfulltextNaN19051905NaNNaNNaNNaNNaN...1865-2085NaNNaN1598-5865Journal Applied Mathematics ComputingSerialSpringer12190http://link.springer.com/journal/121901865-2085
-

2758 rows × 27 columns

-
- - - - -```python -nlch = pd.merge(nlch, df_issnl, on='issn', how='left') -nlch -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
access_typecoverage_depthcoverage_notesdate_first_issue_onlinedate_last_issue_onlinedate_monograph_published_onlinedate_monograph_published_printembargo_infofirst_authorfirst_editor...parent_publication_title_idpreceding_publication_title_idprint_identifierpublication_titlepublication_typepublisher_nametitle_idtitle_urlissnissnl
0NaNfulltextNaN19692015NaNNaNNaNNaNNaN...NaNNaN1074-0708Journal of Agricultural and Applied EconomicsserialCambridge University Pressaaehttp://www.cambridge.org/core/product/identifi...1074-07081074-0708
1NaNfulltextNaN20112015NaNNaNNaNNaNNaN...NaNNaN2070-0733Advances in Applied Mathematics and MechanicsserialCambridge University Pressaamhttp://www.cambridge.org/core/product/identifi...2075-13542070-0733
2NaNfulltextNaN20062015NaNNaNNaNNaNNaN...NaNNaN1748-4995Annals of Actuarial ScienceserialCambridge University Pressaashttp://www.cambridge.org/core/product/identifi...1748-50021748-4995
3NaNfulltextNaN20102015NaNNaNNaNNaNNaN...NaNNaN2040-4700Advances in Animal BiosciencesserialCambridge University Pressabshttp://www.cambridge.org/core/product/identifi...2040-47192040-4700
4NaNfulltextNaN17701992NaNNaNNaNNaNNaN...NaNNaN0261-3409ArchaeologiaserialCambridge University Pressachhttp://www.cambridge.org/core/product/identifi...0261-34090261-3409
..................................................................
2753NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaNNaN1364-6745neurogeneticsSerialSpringer Berlin Heidelberg10048http://link.springer.com/journal/100481364-67531364-6745
2754NaNfulltextNaN20072015NaNNaNNaNNaNNaN...NaNNaN0943-3481uwf UmweltWirtschaftsForum | Sustainability Ma...SerialSpringer Berlin Heidelberg550http://link.springer.com/journal/5501432-22930943-3481
2755NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaNNaN0945-358XÖsterreichische Wasser- und AbfallwirtschaftSerialSpringer Vienna506http://link.springer.com/journal/5061613-75660945-358X
2756NaNfulltextNaN20052015NaNNaNNaNNaNNaN...NaNNaN1011-0070Österreichische Zeitschrift für SoziologieSerialSpringer Fachmedien Wiesbaden11614http://link.springer.com/journal/116141862-25851011-0070
2757NaNfulltextNaN19051905NaNNaNNaNNaNNaN...NaNNaN1598-5865Journal Applied Mathematics ComputingSerialSpringer12190http://link.springer.com/journal/121901865-20851598-5865
-

2758 rows × 28 columns

-
- - - - -```python -# test des lignes sans merge -nlch.loc[nlch['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
access_typecoverage_depthcoverage_notesdate_first_issue_onlinedate_last_issue_onlinedate_monograph_published_onlinedate_monograph_published_printembargo_infofirst_authorfirst_editor...parent_publication_title_idpreceding_publication_title_idprint_identifierpublication_titlepublication_typepublisher_nametitle_idtitle_urlissnissnl
37NaNfulltextNaN19592006NaNNaNNaNNaNNaN...NaNNaN1357-7298Animal scienceserialCambridge University Pressaschttp://www.cambridge.org/core/product/identifi...1748-748XNaN
52NaNfulltextNaN19572015NaNNaNNaNNaNNaN...NaNNaN2055-7973British Catholic HistoryserialCambridge University Pressbchhttp://www.cambridge.org/core/product/identifi...2055-7981NaN
76NaNfulltextNaN18822015NaNNaNNaNNaNNaN...NaNNaN1750-2705Cambridge Classical JournalserialCambridge University Pressccjhttp://www.cambridge.org/core/product/identifi...2047-993XNaN
110NaNfulltextNaN20112015NaNNaNNaNNaNNaN...NaNNaN2079-7362East Asian Journal on Applied MathematicsserialCambridge University Presseamhttp://www.cambridge.org/core/product/identifi...2079-7370NaN
152NaNfulltextNaN19802015NaNNaNNaNNaNNaN...NaNNaN2051-5367Hegel BulletinserialCambridge University Presshglhttp://www.cambridge.org/core/product/identifi...2051-5375NaN
194NaNfulltextNaN19912015NaNNaNNaNNaNNaN...NaNNaN2055-6365Journal of Psychologists and Counsellors in Sc...serialCambridge University Pressjgchttp://www.cambridge.org/core/product/identifi...2055-6373NaN
200NaNfulltextNaN19111993NaNNaNNaNNaNNaN...NaNNaN2049-9299Journal of the Staple Inn Actuarial SocietyserialCambridge University Pressjishttp://www.cambridge.org/core/product/identifi...2059-6162NaN
267NaNfulltextNaN20092015NaNNaNNaNNaNNaN...NaNNaN0016-7746Netherlands Journal of Geosciences / Geologie ...serialCambridge University Pressnjghttp://www.cambridge.org/core/product/identifi...1573-9708NaN
278NaNfulltextNaN20082015NaNNaNNaNNaNNaN...NaNNaNNaNAustralasian Journal of Organisational PsychologyserialCambridge University Pressorphttp://www.cambridge.org/core/product/identifi...2054-2232NaN
375NaNfulltextNaN17882015NaNNaNNaNNaNNaN...NaNNaN1755-6910Earth and environmental science transactions o...serialRoyal Society of Edinburgh Scotland Foundationtrehttp://www.cambridge.org/core/product/identifi...1755-6929NaN
405PfulltextNaN18552017NaNNaNNaNNaNNaN...NaNNaN0341-289XAnnalen des Historischen Vereins für den Niede...yearbookBöhlau Verlag2194-3818https://www.degruyter.com/openurl?genre=journa...2194-3818NaN
411PfulltextNaN19552017NaNNaNNaNNaNNaN...NaNNaN0066-6297Archiv für Diplomatik, Schriftgeschichte, Sieg...yearbookBöhlau Verlag2194-5020https://www.degruyter.com/openurl?genre=journa...2194-5020NaN
413PfulltextNaN19032017NaNNaNNaNNaNNaN...NaNNaN0003-9233Archiv für KulturgeschichteserialBöhlau Verlag2194-3958https://www.degruyter.com/openurl?genre=journa...2194-3958NaN
418PfulltextNaN18762017NaNNaNNaNNaNNaN...NaNNaN0003-9497Archivalische ZeitschriftserialBöhlau Verlag2194-3826https://www.degruyter.com/openurl?genre=journa...2194-3826NaN
427PfulltextNaN19482017NaNNaNNaNNaNNaN...NaNNaN0006-2456Bildung und ErziehungserialBöhlau Verlag2194-3834https://www.degruyter.com/openurl?genre=journa...2194-3834NaN
458PfulltextNaN18672017NaNNaNNaNNaNNaN...NaNNaN0070-444XDeutsches Dante-JahrbuchyearbookDe Gruyter2194-4059https://www.degruyter.com/openurl?genre=journa...2194-4059NaN
468PfulltextNaN19942017NaNNaNNaNNaNNaN...NaNNaN2566-9095Etruscan and Italic StudiesserialDe Gruyter2566-9109https://www.degruyter.com/openurl?genre=journa...2566-9109NaN
479PfulltextNaN20052017NaNNaNNaNNaNNaN...NaNNaN2567-4765FinanzRundschauserialVerlag Dr. Otto Schmidt2567-4897https://www.degruyter.com/openurl?genre=journa...2567-4897NaN
530PfulltextNaN19692017NaNNaNNaNNaNNaN...NaNNaN0074-9818Internationales Jahrbuch der ErwachsenenbildungyearbookBöhlau Verlag2194-3699https://www.degruyter.com/openurl?genre=journa...2194-3699NaN
537PfulltextNaN19122017NaNNaNNaNNaNNaN...NaNNaN0341-9320Jahrbuch des Kölnischen GeschichtsvereinsyearbookBöhlau Verlag2198-0675https://www.degruyter.com/openurl?genre=journa...2198-0675NaN
561PfulltextNaN20122017NaNNaNNaNNaNNaN...NaNNaN2194-6345Journal of Econometric MethodsserialDe Gruyter2156-6674https://www.degruyter.com/openurl?genre=journa...2156-6674NaN
570FfulltextNaN19772017NaNNaNNaNNaNNaN...NaNNaN2567-9430Journal of Laboratory MedicineserialDe Gruyter2567-9449https://www.degruyter.com/openurl?genre=journa...2567-9449NaN
675PfulltextNaN19502017NaNNaNNaNNaNNaN...NaNNaN0080-5319SaeculumserialBöhlau Verlag2194-4075https://www.degruyter.com/openurl?genre=journa...2194-4075NaN
708PfulltextNaN20052017NaNNaNNaNNaNNaN...NaNNaN2363-4774World Political ScienceserialDe Gruyter2363-4782https://www.degruyter.com/openurl?genre=journa...2363-4782NaN
709PfulltextNaN20142017NaNNaNNaNNaNNaN...NaNNaN2196-6249Yearbook for European Jewish Literature StudiesyearbookDe Gruyter2196-6257https://www.degruyter.com/openurl?genre=journa...2196-6257NaN
712PfulltextNaN18612017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
713PfulltextNaN19112017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
714PfulltextNaN18802017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
766NaNfulltextNaN20152018NaNNaNNaNNaNNaN...NaNNaN2041-2649Briefings in Functional GenomicsserialOxford University Pressbfgphttps://academic.oup.com/bfgp2041-2647NaN
890NaNfulltextNaN19222018NaNNaNNaNNaNNaN...NaNNaN0021-924XThe Journal of BiochemistryserialOxford University Pressjbchemhttps://academic.oup.com/jb-NaN
926NaNfulltextNaN18892018NaNNaNNaNNaNNaN...NaNNaN0024-2160The LibraryserialOxford University Presslibrajhttps://academic.oup.com/library-NaN
1010NaNfulltextNaN19771992NaNNaNNaNNaNNaN...NaNNaN0148-0847Social Work Research and AbstractsserialOxford University Pressswrahttps://academic.oup.com/swra1001-3412NaN
1057NaNfulltextNaN20172018NaNNaNNaNNaNNaN...NaNNaN0021-972XThe Journal of Clinical Endocrinology & Metabo...serialOxford University Pressjcemhttps://academic.oup.com/jcem1845-7197NaN
1074NaNfulltextNaN20182018NaNNaNNaNNaNNaN...NaNNaN2398-4910Perspectives on Public Management and GovernanceserialOxford University Pressppmghttps://academic.oup.com/ppmg2398-4929NaN
1094NaNfulltextNaN19762015NaNNaNNaNNaNNaN...NaNNaN2366-004XAbdominal RadiologySerialSpringer US261http://link.springer.com/journal/2612366-0058NaN
1105NaNfulltextNaN19821985NaNNaNNaNNaNNaN...NaNNaN0253-486XGeochemistrySerialScience Press11631http://link.springer.com/journal/11631NaN
1148NaNfulltextNaN19752004NaNNaNNaNNaNNaN...NaNNaN1066-2316American Journal of Criminal JusticeSerialSpringer US12103http://link.springer.com/journal/121031936-1351NaN
1218NaNfulltextNaN20062015NaNNaNNaNNaNNaN...NaNNaN1862-3522Archives of OsteoporosisSerialSpringer London11657http://link.springer.com/journal/116571862-3514NaN
1363NaNfulltextNaN19952002NaNNaNNaNNaNNaN...NaNNaN1006-6497Chinese journal of integrated traditional and ...SerialSpringer Berlin Heidelberg11655http://link.springer.com/journal/11655NaN
1365NaNfulltextNaN20092015NaNNaNNaNNaNNaN...NaNNaN0256-7679Chinese Journal of Polymer ScienceSerialChinese Chemical Society and Institute of Chem...10118http://link.springer.com/journal/101181439-6203NaN
1382NaNfulltextNaN19831994NaNNaNNaNNaNNaN...NaNNaN0731-8235Clinical reviews in allergySerialSpringer US12016http://link.springer.com/journal/12016NaN
1383NaNfulltextNaN19822015NaNNaNNaNNaNNaN...NaNNaN0770-3198Clinical RheumatologySerialSpringer London10067http://link.springer.com/journal/100671434-9949NaN
1938NaNfulltextNaN20082015NaNNaNNaNNaNNaN...NaNNaN1936-1521Journal of Child & Adolescent TraumaSerialSpringer International Publishing40653http://link.springer.com/journal/406531936-153XNaN
2003NaNfulltextNaN19862015NaNNaNNaNNaNNaN...NaNNaN0884-8734Journal of General Internal MedicineSerialSpringer US11606http://link.springer.com/journal/116061525-1497NaN
2136NaNfulltextNaN20062015NaNNaNNaNNaNNaN...NaNNaN1009-6124Journal of Systems Science and ComplexitySerialAcademy of Mathematics and Systems Science, Ch...11424http://link.springer.com/journal/114241559-7067NaN
2255NaNfulltextNaN19742015NaNNaNNaNNaNNaN...NaNNaN0095-3628Microbial EcologySerialSpringer US248http://link.springer.com/journal/2481432-184XNaN
2355NaNfulltextNaN19921995NaNNaNNaNNaNNaN...NaNNaN0941-2530Orthopedics and TraumatologySerialUrban & Vogel65http://link.springer.com/journal/651617-3838NaN
2674NaNfulltextNaN18831887NaNNaNNaNNaNNaN...NaNNaNNaNTransactions of the Academy of Medicine in Ire...SerialSpringer-Verlag12680http://link.springer.com/journal/12680NaNNaN
-

48 rows × 28 columns

-
- - - - -```python -# utiliser l'ISSN à la place sur ces lignes -nlch.loc[nlch['issnl'].isna(), 'issnl'] = nlch['issn'] -``` - - -```python -# test des lignes sans merge -nlch.loc[nlch['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
access_typecoverage_depthcoverage_notesdate_first_issue_onlinedate_last_issue_onlinedate_monograph_published_onlinedate_monograph_published_printembargo_infofirst_authorfirst_editor...parent_publication_title_idpreceding_publication_title_idprint_identifierpublication_titlepublication_typepublisher_nametitle_idtitle_urlissnissnl
712PfulltextNaN18612017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
713PfulltextNaN19112017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
714PfulltextNaN18802017NaNNaNNaNNaNNaN...NaNNaNNaNZeitschrift der Savigny-Stiftung für Rechtsges...serialNaNNaNhttps://www.degruyter.com/openurl?genre=journa...NaNNaN
2674NaNfulltextNaN18831887NaNNaNNaNNaNNaN...NaNNaNNaNTransactions of the Academy of Medicine in Ire...SerialSpringer-Verlag12680http://link.springer.com/journal/12680NaNNaN
-

4 rows × 28 columns

-
- - - - -```python -# ajout des infos de nlch : -# publication_title -nlch_for_merge = nlch[['issnl', 'publication_title']] -nlch_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlpublication_title
01074-0708Journal of Agricultural and Applied Economics
12070-0733Advances in Applied Mathematics and Mechanics
21748-4995Annals of Actuarial Science
32040-4700Advances in Animal Biosciences
40261-3409Archaeologia
.........
27531364-6745neurogenetics
27540943-3481uwf UmweltWirtschaftsForum | Sustainability Ma...
27550945-358XÖsterreichische Wasser- und Abfallwirtschaft
27561011-0070Österreichische Zeitschrift für Soziologie
27571598-5865Journal Applied Mathematics Computing
-

2758 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -nlch_for_merge = nlch_for_merge.rename(columns={'publication_title' : 'nlch_title'}) -nlch_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlnlch_title
01074-0708Journal of Agricultural and Applied Economics
12070-0733Advances in Applied Mathematics and Mechanics
21748-4995Annals of Actuarial Science
32040-4700Advances in Animal Biosciences
40261-3409Archaeologia
.........
27531364-6745neurogenetics
27540943-3481uwf UmweltWirtschaftsForum | Sustainability Ma...
27550945-358XÖsterreichische Wasser- und Abfallwirtschaft
27561011-0070Österreichische Zeitschrift für Soziologie
27571598-5865Journal Applied Mathematics Computing
-

2758 rows × 2 columns

-
- - - - -```python -# merge avec journals -journals = pd.merge(journals, nlch_for_merge, on='issnl', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_title
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0NaN0.0NaN
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved1.0NaN
............................................................
9939970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0preserved1.0NaN
9949980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN
9959991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields
99610000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0NaN
99710010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0NaN
-

998 rows × 19 columns

-
- - - - -```python -# ajouter info sur la presence sur portico -journals.loc[journals['nlch_title'].isna(), 'nlch'] = 0 -journals.loc[~journals['nlch_title'].isna(), 'nlch'] = 1 -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlch
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1Yes1.0PLoS One1.0NaN0.0NaN0.0
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0
...............................................................
9939970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0
9949980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0
9959991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0
99610000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0
99710010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0
-

998 rows × 20 columns

-
- - - -### QOAM - - -```python -# ouverture du fichier -qoam = pd.read_csv('qoam/qoam_not_zero.tsv', encoding='utf-8', header=0, sep='\t') -qoam -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnqoam_av_score
02254-58835.0
12279-72545.0
22317-30765.0
32525-34685.0
41339-84745.0
.........
30182083-48101.0
30191759-22081.0
30200219-98741.0
30212083-61391.0
30222312-27571.0
-

3023 rows × 2 columns

-
- - - - -```python -qoam = pd.merge(qoam, df_issnl, on='issn', how='left') -qoam -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnqoam_av_scoreissnl
02254-58835.02254-5883
12279-72545.02279-7254
22317-30765.02317-3076
32525-34685.02525-3468
41339-84745.01339-8474
............
30182083-48101.02083-4810
30191759-22081.01759-2208
30200219-98741.00219-9874
30212083-61391.02083-6139
30222312-27571.02312-2757
-

3023 rows × 3 columns

-
- - - - -```python -# test des lignes sans merge -qoam.loc[qoam['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnqoam_av_scoreissnl
242163-11824.50NaN
732292-13544.00NaN
772571-51354.00NaN
902201-568X4.00NaN
3021687-921X3.50NaN
4052391-54123.25NaN
4382668-05723.25NaN
8012391-54203.00NaN
8032391-54473.00NaN
8142391-54553.00NaN
8152391-54713.00NaN
11002516-31592.75NaN
12162289-56392.50NaN
12282211-38352.50NaN
15061658-35582.25NaN
15502214-62962.25NaN
19601687-52572.00NaN
19751687-56992.00NaN
21402056-33152.00NaN
21502083-36362.00NaN
21892366-00581.75NaN
21982450-69661.75NaN
22541308-69791.75NaN
22671035-76801.75NaN
23002411-96601.75NaN
26112198-26271.25NaN
28042180-27261.00NaN
29792146-05741.00NaN
-
- - - - -```python -# utiliser l'ISSN à la place sur ces lignes -qoam.loc[qoam['issnl'].isna(), 'issnl'] = qoam['issn'] -``` - - -```python -# test des lignes sans merge -qoam.loc[qoam['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - -
issnqoam_av_scoreissnl
-
- - - - -```python -# ajout des infos de qoam : -# publication_title -qoam_for_merge = qoam[['issnl', 'qoam_av_score']] -qoam_for_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlqoam_av_score
02254-58835.0
12279-72545.0
22317-30765.0
32525-34685.0
41339-84745.0
.........
30182083-48101.0
30191759-22081.0
30200219-98741.0
30212083-61391.0
30222312-27571.0
-

3023 rows × 2 columns

-
- - - - -```python -# merge avec journals -journals = pd.merge(journals, qoam_for_merge, on='issnl', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...doaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_score
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...1Yes1.0PLoS One1.0NaN0.0NaN0.04.035714
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
..................................................................
9959970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
9969980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
9979991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...NaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaN
99810000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
99910010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
-

1000 rows × 21 columns

-
- - - - -```python -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...doaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_score
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...1Yes1.0PLoS One1.0NaN0.0NaN0.04.035714
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
..................................................................
9959970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
9969980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
9979991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...NaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaN
99810000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...NaNNaN0.0NaN0.0preserved1.0NaN0.0NaN
99910010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...NaNNaN0.0NaN0.0NaN0.0NaN0.0NaN
-

996 rows × 21 columns

-
- - - -## Finalisation de la table journals - - -```python -# test des doublons -journals_doublons = journals[['issn', 'issnl', 'title']].loc[journals.duplicated(subset='issnl')].sort_values(by='issnl') -journals_doublons -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnltitle
921520-51260002-7863Journal of the American Chemical Society (Online)
3931520-68820003-2700Analytical chemistry (Online)
3101077-31180003-6951Applied physics letters (Online)
1671432-07460004-6361Astronomy & astrophysics (Online)
7931542-00860006-3495Biophysical journal (Online)
............
4262050-74962050-7496Journal of materials chemistry. A (Online)
9522050-75342050-7526Journal of materials chemistry. C (Online)
832469-99692469-9950Physical review. B. (Online)
2092470-00292470-0010Physical review. D. (Online)
8402470-00532470-0045Physical review. E (Online)
-

85 rows × 3 columns

-
- - - - -```python -journals_doublons = journals_doublons.loc[journals_doublons['issnl'].notna()] -``` - - -```python -# merge pour voir les lignes avec doublon -journals_doublons['doublon_issnl'] = 1 -journals = pd.merge(journals, journals_doublons[['issnl', 'doublon_issnl']], on='issnl', how='left') -journals.loc[journals['doublon_issnl'] == 1] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...APCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnl
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
560003-69510003-6951Applied physics letters19629999http://scitation.aip.org/aplo/Appl. phys. lett.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
671029-84791029-8479The journal of high energy physics (Online)19979999http://link.springer.com/journal/13130J. high energy phys. (Online)12483...No1.0Journal of High Energy Physics1.0preserved1.0NaN0.0NaN1.0
780002-78630002-7863Journal of the American Chemical Society (Print)18799999http://pubs.acs.org/journals/jacsat/index.htmlJ. Am. Chem. Soc. (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
..................................................................
9449501520-52071520-5207The journal of physical chemistry. B (1997 : O...19979999http://pubs.acs.org/journals/jpcbfk/index.htmlJ. phys. chem., B (1997 : Online)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
9469521361-65280957-4484Nanotechnology (Bristol. Online)19909999http://www.iop.org/Journals/naNanotechnology (Bristol, Online)124234...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
9479531469-76450022-1120Journal of fluid mechanics (Online)19569999http://firstsearch.oclc.orgJ. fluid mech. (Online)124234...NaN0.0NaN0.0preserved1.0Journal of Fluid Mechanics1.0NaN1.0
9489542050-75342050-7526Journal of materials chemistry. C (Online)20139999http://pubs.rsc.org/en/journals/journalissues/tc#J. mater. chem. C (Online)124234...NaN0.0Journal of Materials Chemistry C: Materials fo...1.0preserved1.0NaN0.0NaN1.0
9749801477-09701352-4585Multiple sclerosis (Online)19959999http://www.arnoldpublishers.com/journals/pages...Mult. scler. (Online)124234...NaN0.0Multiple Sclerosis Journal1.0preserved1.0NaN0.01.751.0
-

170 rows × 22 columns

-
- - - - -```python -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...APCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnl
780002-78630002-7863Journal of the American Chemical Society (Print)18799999http://pubs.acs.org/journals/jacsat/index.htmlJ. Am. Chem. Soc. (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
92931520-51260002-7863Journal of the American Chemical Society (Online)18799999http://books.google.com/books?id=ExsEZbIZKjwCJ. Am. Chem. Soc. (Online)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
3933961520-68820003-2700Analytical chemistry (Online)19479999http://pubs.acs.org/journals/ancham/about.htmlAnal. chem. (Online)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
69700003-27000003-2700Analytical chemistry (Washington)19489999http://pubs.acs.org/journals/ancham/index.htmlAnal. chem. (Wash.)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
560003-69510003-6951Applied physics letters19629999http://scitation.aip.org/aplo/Appl. phys. lett.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
..................................................................
40412469-99502469-9950Physical review. B20169999http://journals.aps.org/prbPhys. rev. B.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
79802470-00102470-0010Physical review. D20169999http://journals.aps.org/prdPhys. rev. D.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
2092102470-00292470-0010Physical review. D. (Online)20169999http://journals.aps.org/prdPhys. rev. D. (Online)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
5305332470-00452470-0045Physical review. E (Print)20169999http://journals.aps.org/prePhys. rev., E (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
8368422470-00532470-0045Physical review. E (Online)20169999http://journals.aps.org/prePhys. rev., E (Online)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
-

170 rows × 22 columns

-
- - - - -```python -# export csv des doublons -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl').to_csv('sample/journals_duplicates.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel des doublons -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl').to_excel('sample/journals_duplicates.xlsx', index=False) -``` - - -```python -# suppression des doublons -journals = journals.drop_duplicates(subset=['issnl']) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...APCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnl
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...NaN0.0NaN0.0NaN0.0NaN0.0NaNNaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...Yes1.0PLoS One1.0NaN0.0NaN0.04.035714NaN
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...NaN0.0NaN0.0NaN0.0NaN0.0NaNNaN
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...NaN0.0NaN0.0preserved1.0NaN0.0NaN1.0
..................................................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...NaN0.0NaN0.0preserved1.0NaN0.0NaNNaN
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...NaN0.0NaN0.0preserved1.0NaN0.0NaNNaN
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...NaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...NaN0.0NaN0.0preserved1.0NaN0.0NaNNaN
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...NaN0.0NaN0.0NaN0.0NaN0.0NaNNaN
-

911 rows × 22 columns

-
- - - - -```python -# ajout du oa_status -# 6 : Diamond -# 5 : Gold -# 4 : Full -# 3 : Hybrid -# 2 : Green -# 1 : UNKNOWN -journals['oa_status'] = 1 -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...doaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...1.0PLoS One1.0NaN0.0NaN0.04.035714NaN1
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
..................................................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN1
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
-

911 rows × 23 columns

-
- - - - -```python -# status 5 pour les revues DOAJ -journals.loc[journals['doaj_status'] == 1, 'oa_status'] = 5 -# status 6 pour les revues DOAJ avec APC = 0 -journals.loc[(journals['doaj_status'] == 1) & (journals['APC'] == 'No'), 'oa_status'] = 6 -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...doaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...1.0PLoS One1.0NaN0.0NaN0.04.035714NaN5
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
..................................................................
9919970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9929980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9939991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN1
99410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
99510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
-

911 rows × 23 columns

-
- - - - -```python -journals['oa_status'].value_counts() -``` - - - - - 1 824 - 5 70 - 6 17 - Name: oa_status, dtype: int64 - - - - -```python -# export csv brut -journals.to_csv('sample/journals_brut.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel brut -journals.to_excel('sample/journals_brut.xlsx', index=False) -``` - - -```python -# export csv des ids -journals[['id', 'title', 'issn', 'issnl']].to_csv('sample/journals_ids.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel des ids -journals[['id', 'title', 'issn', 'issnl']].to_excel('sample/journals_ids.xlsx', index=False) -``` - - -```python - -``` diff --git a/import_scripts/03_oacct_journals.py b/import_scripts/03_oacct_journals.py deleted file mode 100644 index 3b63a7b5..00000000 --- a/import_scripts/03_oacct_journals.py +++ /dev/null @@ -1,1062 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Extraction des données des revues -# -# -# ## Corpus initial -# -# ISSNs des revues des publication archivées sur l'AoU UNIGE et sur Infoscience EPFL -# -# * Fichier des ISSNs de l'AoU exporté le 16.10.2020 -# * Fichier des ISSNs de Infoscience exporté le 28.01.2021 -# * Données extraits à partir du JSON de ISSN.org -# - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os -# paramètre pour le nombre de journaux dans le sample (0 pour prendre tout) -journals_sample_n = 1000 - - -# ## Table OA categories -# -# * 1 : UNKNOWN -# * 2 : Green -# * 3 : Hybrid -# * 4 : Full -# * 5 : Gold -# * 6 : Diamond - -# In[2]: - - -# creation du DF -col_names = ['id', - 'status', - 'description', - 'subscription', - 'accepted_manuscript', - 'apc', - 'final_version' - ] -oas = pd.DataFrame(columns = col_names) -oas - - -# In[3]: - - -# ajout des valeurs -oas = oas.append({'id' : 1, 'status' : 'UNKNOWN', 'description' : '', 'subscription' : 0, 'accepted_manuscript' : 0, 'apc' : 0, 'final_version' : 0}, ignore_index=True) -oas = oas.append({'id' : 2, 'status' : 'Green', 'description' : 'Paywalled access journal, usually allows the archive of submitted or accepted version on institutional repositories (embargo periods may apply)', 'subscription' : 1, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 0}, ignore_index=True) -oas = oas.append({'id' : 3, 'status' : 'hybrid', 'description' : 'Paywalled access journal, offers several Open Access upon payment of APCs. It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 1, 'accepted_manuscript' : 1, 'apc' : 1, 'final_version' : 1}, ignore_index=True) -# oas = oas.append({'id' : 4, 'status' : 'Full', 'description' : 'No subscription, Green or Gold', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 1}, ignore_index=True) -oas = oas.append({'id' : 5, 'status' : 'Gold', 'description' : 'Open Access journal (payment of APCs may apply). It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 1, 'final_version' : 1}, ignore_index=True) -oas = oas.append({'id' : 6, 'status' : 'Diamond', 'description' : 'Open Access journal (without payment of APCs). It allows offten the archive of published version on institutional repositories (embargo periods can apply)', 'subscription' : 0, 'accepted_manuscript' : 1, 'apc' : 0, 'final_version' : 1}, ignore_index=True) - - -# In[4]: - - -oas - - -# In[5]: - - -# esport JSON -result = oas.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/oa.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[6]: - - -# export csv -oas.to_csv('sample/oa.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[7]: - - -# export excel -oas.to_excel('sample/oa.xlsx', index=False) - - -# ## Table Journals - -# In[8]: - - -issns = pd.read_csv('issn/issns_count.tsv', encoding='utf-8', header=0, sep='\t') -issns - - -# In[9]: - - -# ajout des colonnes -issns.insert(0, 'id', '', False) -issns - - -# In[10]: - - -# convertir l'index en id -issns = issns.reset_index() -issns - - -# In[11]: - - -# ajout de l'id avec l'index + 1 -issns['id'] = issns['index'] + 1 -del issns['index'] -issns - - -# In[12]: - - -# reduction à X journaux pour l'échantillon de test -if journals_sample_n > 0 : - issns = issns.loc[:journals_sample_n] -issns - - -# In[13]: - - -# ajout des ISSN-L -df_issnl = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -df_issnl - - -# In[14]: - - -# renommer les colonnes -df_issnl = df_issnl.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) - - -# In[15]: - - -issns = pd.merge(issns, df_issnl, on='issn', how='left') -issns - - -# In[16]: - - -# creation du DF -# 'oa_status' supprimé pour le moment -col_names = ['id', - 'issn', - 'issnl', - 'title', - 'starting_year', - 'end_year', - 'url', - 'name_short_iso_4' - ] -journals = pd.DataFrame(columns = col_names) -journals - - -# In[17]: - - -# creation du DF -col_names = ['id', 'iso_code'] -journals_languages = pd.DataFrame(columns = col_names) -journals_languages - - -# In[18]: - - -# creation du DF -# 'oa_status' supprimé -col_names = ['id', 'iso_code'] -journals_countries = pd.DataFrame(columns = col_names) -journals_countries - - -# In[19]: - - -# extraction des informations à partir des données ISSN.org -for index, row in issns.iterrows(): - myid = row['id'] - myissn = row['issn'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - issnl = np.nan - title = '' - keytitle = '' - starting_year = np.nan - end_year = np.nan - myurl = np.nan - journal_country = np.nan - journal_language = np.nan - keytitle_abbr = np.nan - # export en json - if os.path.exists('issn/data/' + myissn + '.json'): - with open('issn/data/' + myissn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + myissn): - if ('mainTitle' in x): - title = x['mainTitle'] - else : - if ('name' in x): - title = x['name'] - # print(myissn) - if ('startDate' in x): - starting_year = x['startDate'] - if ('endDate' in x): - end_year = x['endDate'] - if ('url' in x): - urls = x['url'] - if type(urls) is list: - for url in urls: - # Filtrer les URLs des archives : - # www.ncbi.nlm.nih.gov/pmc/* - # www.pubmedcentral.gov/* - # pubmedcentral.nih.gov/* - # bibpurl.oclc.org/* - # www.jstor.org/* - # ieeexplore.ieee.org - # ovidsp.ovid.com - # et garder le premier des restants - myurl = url - if ('ncbi.nlm.nih.gov' not in url - and 'pubmedcentral' not in url - and 'bibpurl.oclc.org' not in url - and 'jstor.org' not in url - and 'ieeexplore.ieee.org' not in url - and 'ovidsp.ovid.com' not in url): - break - else : - myurl = x['url'] - if ('spatial' in x): - countries = x['spatial'] - if type(countries) is list: - for country in countries: - if ('https://www.iso.org/obp/ui/#iso:code:3166:' in country): - journal_country = country[-2:] - journals_countries = journals_countries.append({'id' : myid, 'iso_code' : journal_country}, ignore_index=True) - else : - if ('https://www.iso.org/obp/ui/#iso:code:3166:' in countries): - journal_country = countries[-2:] - journals_countries = journals_countries.append({'id' : myid, 'iso_code' : journal_country}, ignore_index=True) - # langue "inLanguage": "http://id.loc.gov/vocabulary/iso639-2/eng", - if ('inLanguage' in x): - languages = x['inLanguage'] - if type(languages) is list: - for language in languages: - journal_language = language[-3:] - journals_languages = journals_languages.append({'id' : myid, 'iso_code' : journal_language}, ignore_index=True) - else : - journal_language = languages[-3:] - journals_languages = journals_languages.append({'id' : myid, 'iso_code' : journal_language}, ignore_index=True) - if (x['@id'] == 'resource/ISSN/' + myissn + '#KeyTitle'): - if ('value' in x): - keytitle = x['value'] - if (x['@id'] == 'resource/ISSN/' + myissn + '#ISSN-L'): - if ('value' in x): - issnl = x['value'] - # "@id": "resource/ISSN/1098-0121#AbbreviatedKeyTitle", - if (x['@id'] == 'resource/ISSN/' + myissn + '#AbbreviatedKeyTitle'): - if ('value' in x): - mykeytitle_abbrs = x['value'] - if type(mykeytitle_abbrs) is list: - for mykeytitle_abbr in mykeytitle_abbrs: - print(myissn + ' - AbbreviatedKeyTitle is a list ' + mykeytitle_abbr) - keytitle_abbr = mykeytitle_abbr - with open('sample/03_journals_issn_multiple_titles.txt', 'a', encoding='utf-8') as g: - g.write(myissn + ' AbbreviatedKeyTitle is a list ' + mykeytitle_abbr + '\n') - break - else : - keytitle_abbr = mykeytitle_abbrs - if keytitle != '' : - title = keytitle - if title != '' : - # supprimer le point à la fin - if (title[-1] == '.'): - title = title[0:-1] - # remplacer les caractères spéciaux ˜The œ - if type(title) is list: - for mytitlei in title: - print(myissn + ' - title is a list ' + mytitlei) - title = str.replace(mytitlei, '˜The œ', 'The ') - with open('sample/03_journals_issn_multiple_titles.txt', 'a', encoding='utf-8') as g: - g.write(myissn + ' title is a list ' + mytitlei + '\n') - break - else : - title = str.replace(title, '˜The œ', 'The ') - else : - print(row['issn'] + ' - not found') - with open('sample/03_journals_issn_errors.txt', 'a', encoding='utf-8') as g: - g.write(row['issn'] + ' not found \n') - journals.at[index,'id'] = myid - journals.at[index,'title'] = title - journals.at[index,'issn'] = myissn - journals.at[index,'issnl'] = issnl - journals.at[index,'starting_year'] = starting_year - journals.at[index,'end_year'] = end_year - journals.at[index,'url'] = myurl - journals.at[index,'name_short_iso_4'] = keytitle_abbr - - -# In[20]: - - -journals - - -# In[21]: - - -# titres vides -journals.loc[journals['title'] == ''] - - -# In[22]: - - -# export csv des titres vides -journals.loc[journals['title'] == ''].to_csv('sample/journals_sans_titre.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[23]: - - -# export excel des ids -journals.loc[journals['title'] == ''].to_excel('sample/journals_sans_titre.xlsx', index=False) - - -# In[24]: - - -# garder les lignes avec titre -journals = journals.loc[journals['title'] != ''] -journals - - -# In[25]: - - -journals.shape[0] - - -# ## Languages - -# In[26]: - - -journals_languages - - -# In[27]: - - -# ouvrir la table des langues -languages = pd.read_csv('sample/language.tsv', encoding='utf-8', header=0, sep='\t') -languages - - -# In[28]: - - -# renommer les colonnes -del languages['name'] -languages = languages.rename(columns={'id' : 'language'}) - - -# In[29]: - - -# merge avec languages -journals_languages = pd.merge(journals_languages, languages, on='iso_code', how='left') -journals_languages - - -# In[30]: - - -# concat valeurs avec même id -journals_languages['language'] = journals_languages['language'].astype(str) -journals_languages = journals_languages.groupby('id').agg({'language': lambda x: ', '.join(x)}) -journals_languages - - -# In[31]: - - -# recuperation de l'id des langues -journals = pd.merge(journals, journals_languages, on='id', how='left') -journals - - -# ## Countries - -# In[32]: - - -journals_countries - - -# In[33]: - - -# ouvrir la table des pays -country = pd.read_csv('sample/country.tsv', encoding='utf-8', header=0, sep='\t') -country - - -# In[34]: - - -# renommer les colonnes -del country['name'] -country = country.rename(columns={'id' : 'country'}) - - -# In[35]: - - -# merge avec countries -journals_countries = pd.merge(journals_countries, country, on='iso_code', how='left') -journals_countries - - -# In[36]: - - -# concat valeurs avec même id -journals_countries['country'] = journals_countries['country'].astype(str) -journals_countries = journals_countries.groupby('id').agg({'country': lambda x: ', '.join(x)}) -journals_countries - - -# In[37]: - - -# recuperation de l'id des langues -journals = pd.merge(journals, journals_countries, on='id', how='left') -journals - - -# ### DOAJ - -# In[38]: - - -# ajout de DOAJ info -doaj = pd.read_csv('doaj/journalcsv__doaj_20210312_0636_utf8.csv', encoding='utf-8', header=0) -doaj - - -# In[39]: - - -# ajout ISSNL -doaj['issn'] = doaj['Journal ISSN (print version)'] -doaj.loc[doaj['issn'].isna(), 'issn'] = doaj['Journal EISSN (online version)'] -doaj - - -# In[40]: - - -doaj = pd.merge(doaj, df_issnl, on='issn', how='left') -doaj - - -# In[41]: - - -doaj.columns - - -# In[42]: - - -doaj['Preservation Services'] - - -# In[43]: - - -doaj['DOAJ Seal'] - - -# In[44]: - - -doaj['issnl'] - - -# In[45]: - - -doaj['APC'].value_counts() - - -# In[46]: - - -# ajout des infos de DOAJ : -# Journal title -# DOAJ Seal -doaj_for_merge = doaj[['issnl', 'Journal title', 'DOAJ Seal', 'APC']] -doaj_for_merge - - -# In[47]: - - -# renommer les colonnes -doaj_for_merge = doaj_for_merge.rename(columns={'Journal title' : 'doaj_title', 'DOAJ Seal' : 'doaj_seal'}) -doaj_for_merge - - -# In[48]: - - -# merge avec journals -journals = pd.merge(journals, doaj_for_merge, on='issnl', how='left') -journals - - -# In[49]: - - -# ajouter info sur la presence sur DOAJ ou du seal -journals.loc[journals['doaj_title'].isna(), 'doaj_status'] = 0 -journals.loc[~journals['doaj_title'].isna(), 'doaj_status'] = 1 -journals.loc[journals['doaj_seal'] == 'Yes', 'doaj_seal'] = 1 -journals.loc[journals['doaj_seal'] == 'No', 'doaj_seal'] = 0 -journals - - -# ### LOCKSS - -# In[50]: - - -# ajout des infos de preservation LOCKSS, Portico et Licences Nationales -lockss = pd.read_csv('lockss/keepers-LOCKSS-report.csv', encoding='utf-8', header=0, skiprows=1) -lockss - - -# In[51]: - - -# ajout ISSNL -lockss['issn'] = lockss['eISSN'] -lockss.loc[lockss['eISSN'].isna(), 'issn'] = lockss['ISSN'] -lockss - - -# In[52]: - - -lockss = pd.merge(lockss, df_issnl, on='issn', how='left') -lockss - - -# In[53]: - - -lockss.columns - - -# In[54]: - - -# test des lignes sans merge -lockss.loc[lockss['issnl'].isna()] - - -# In[55]: - - -# utiliser l'ISSN à la place sur ces lignes -lockss.loc[lockss['issnl'].isna(), 'issnl'] = lockss['issn'] - - -# In[56]: - - -# test des lignes sans merge -lockss.loc[lockss['issnl'].isna()] - - -# In[57]: - - -# ajout des infos de LOCKSS : -# Title -lockss_for_merge = lockss[['issnl', 'Title']] -lockss_for_merge - - -# In[58]: - - -# renommer les colonnes -lockss_for_merge = lockss_for_merge.rename(columns={'Title' : 'lockss_title'}) -lockss_for_merge - - -# In[59]: - - -# merge avec journals -journals = pd.merge(journals, lockss_for_merge, on='issnl', how='left') -journals - - -# In[60]: - - -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals - - -# In[61]: - - -# ajouter info sur la presence sur LOCKSS -journals.loc[journals['lockss_title'].isna(), 'lockss'] = 0 -journals.loc[~journals['lockss_title'].isna(), 'lockss'] = 1 -journals - - -# ### Portico - -# In[62]: - - -# ajout des infos de preservation Portico -portico = pd.read_excel('portico/e-journals.xlsx', sheet_name='Details', skiprows=2) -portico - - -# In[63]: - - -# ajout ISSNL -portico['issn'] = portico['e-ISSN'] -portico.loc[portico['e-ISSN'].isna(), 'issn'] = portico['Print ISSN'] -portico - - -# In[64]: - - -portico = pd.merge(portico, df_issnl, on='issn', how='left') -portico - - -# In[65]: - - -portico.columns - - -# In[66]: - - -# test des lignes sans merge -portico.loc[portico['issnl'].isna()] - - -# In[67]: - - -# utiliser l'ISSN à la place sur ces lignes -portico.loc[portico['issnl'].isna(), 'issnl'] = portico['issn'] - - -# In[68]: - - -# test des lignes sans merge -portico.loc[portico['issnl'].isna()] - - -# In[69]: - - -# ajout des infos de Portico : -# Status -portico_for_merge = portico[['issnl', 'Status']] -portico_for_merge - - -# In[70]: - - -# garder les lignes "preserved" -portico_for_merge = portico_for_merge.loc[portico_for_merge['Status'] == 'preserved'] -portico_for_merge - - -# In[71]: - - -# renommer les colonnes -portico_for_merge = portico_for_merge.rename(columns={'Status' : 'portico_status'}) -portico_for_merge - - -# In[72]: - - -# merge avec journals -journals = pd.merge(journals, portico_for_merge, on='issnl', how='left') -journals - - -# In[73]: - - -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals - - -# In[74]: - - -# ajouter info sur la presence sur portico -journals.loc[journals['portico_status'].isna(), 'portico'] = 0 -journals.loc[~journals['portico_status'].isna(), 'portico'] = 1 -journals - - -# ### Licences Nationales - -# In[75]: - - -# ajout des infos de preservation des Licences nationales -nlch1 = pd.read_excel('licences_nationales/cambridge_Switzerland_NationalLicences_2020-08-17.xlsx') -nlch1 - - -# In[76]: - - -# ajout des infos de preservation des Licences nationales -nlch2 = pd.read_excel('licences_nationales/gruyter_Switzerland_NationalLicences_2020-11-30.xlsx') -nlch2 - - -# In[77]: - - -# ajout des infos de preservation des Licences nationales -nlch3 = pd.read_excel('licences_nationales/oxford_Switzerland_NationalLicences_2020-09-24.xlsx') -nlch3 - - -# In[78]: - - -# ajout des infos de preservation des Licences nationales -nlch4 = pd.read_excel('licences_nationales/springer_Switzerland_NationalLicences_2020-08-12.xlsx') -nlch4 - - -# In[79]: - - -# concatener les 4 -nlch = pd.concat([nlch1, nlch2, nlch3, nlch4], ignore_index=True) -nlch - - -# In[80]: - - -nlch.columns - - -# In[81]: - - -# ajout ISSNL -nlch['issn'] = nlch['online_identifier'] -nlch.loc[nlch['online_identifier'].isna(), 'issn'] = nlch['print_identifier'] -nlch - - -# In[82]: - - -nlch = pd.merge(nlch, df_issnl, on='issn', how='left') -nlch - - -# In[83]: - - -# test des lignes sans merge -nlch.loc[nlch['issnl'].isna()] - - -# In[84]: - - -# utiliser l'ISSN à la place sur ces lignes -nlch.loc[nlch['issnl'].isna(), 'issnl'] = nlch['issn'] - - -# In[85]: - - -# test des lignes sans merge -nlch.loc[nlch['issnl'].isna()] - - -# In[86]: - - -# ajout des infos de nlch : -# publication_title -nlch_for_merge = nlch[['issnl', 'publication_title']] -nlch_for_merge - - -# In[87]: - - -# renommer les colonnes -nlch_for_merge = nlch_for_merge.rename(columns={'publication_title' : 'nlch_title'}) -nlch_for_merge - - -# In[88]: - - -# merge avec journals -journals = pd.merge(journals, nlch_for_merge, on='issnl', how='left') -journals - - -# In[89]: - - -# ajouter info sur la presence sur portico -journals.loc[journals['nlch_title'].isna(), 'nlch'] = 0 -journals.loc[~journals['nlch_title'].isna(), 'nlch'] = 1 -journals - - -# ### QOAM - -# In[90]: - - -# ouverture du fichier -qoam = pd.read_csv('qoam/qoam_not_zero.tsv', encoding='utf-8', header=0, sep='\t') -qoam - - -# In[91]: - - -qoam = pd.merge(qoam, df_issnl, on='issn', how='left') -qoam - - -# In[92]: - - -# test des lignes sans merge -qoam.loc[qoam['issnl'].isna()] - - -# In[93]: - - -# utiliser l'ISSN à la place sur ces lignes -qoam.loc[qoam['issnl'].isna(), 'issnl'] = qoam['issn'] - - -# In[94]: - - -# test des lignes sans merge -qoam.loc[qoam['issnl'].isna()] - - -# In[95]: - - -# ajout des infos de qoam : -# publication_title -qoam_for_merge = qoam[['issnl', 'qoam_av_score']] -qoam_for_merge - - -# In[96]: - - -# merge avec journals -journals = pd.merge(journals, qoam_for_merge, on='issnl', how='left') -journals - - -# In[97]: - - -# suppression des doublons -journals = journals.drop_duplicates(subset=['id']) -journals - - -# ## Finalisation de la table journals - -# In[98]: - - -# test des doublons -journals_doublons = journals[['issn', 'issnl', 'title']].loc[journals.duplicated(subset='issnl')].sort_values(by='issnl') -journals_doublons - - -# In[99]: - - -journals_doublons = journals_doublons.loc[journals_doublons['issnl'].notna()] - - -# In[100]: - - -# merge pour voir les lignes avec doublon -journals_doublons['doublon_issnl'] = 1 -journals = pd.merge(journals, journals_doublons[['issnl', 'doublon_issnl']], on='issnl', how='left') -journals.loc[journals['doublon_issnl'] == 1] - - -# In[101]: - - -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl') - - -# In[102]: - - -# export csv des doublons -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl').to_csv('sample/journals_duplicates.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[103]: - - -# export excel des doublons -journals.loc[journals['doublon_issnl'] == 1].sort_values(by='issnl').to_excel('sample/journals_duplicates.xlsx', index=False) - - -# In[104]: - - -# suppression des doublons -journals = journals.drop_duplicates(subset=['issnl']) -journals - - -# In[105]: - - -# ajout du oa_status -# 6 : Diamond -# 5 : Gold -# 4 : Full -# 3 : Hybrid -# 2 : Green -# 1 : UNKNOWN -journals['oa_status'] = 1 -journals - - -# In[106]: - - -# status 5 pour les revues DOAJ -journals.loc[journals['doaj_status'] == 1, 'oa_status'] = 5 -# status 6 pour les revues DOAJ avec APC = 0 -journals.loc[(journals['doaj_status'] == 1) & (journals['APC'] == 'No'), 'oa_status'] = 6 -journals - - -# In[107]: - - -journals['oa_status'].value_counts() - - -# In[108]: - - -# export csv brut -journals.to_csv('sample/journals_brut.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[109]: - - -# export excel brut -journals.to_excel('sample/journals_brut.xlsx', index=False) - - -# In[110]: - - -# export csv des ids -journals[['id', 'title', 'issn', 'issnl']].to_csv('sample/journals_ids.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[111]: - - -# export excel des ids -journals[['id', 'title', 'issn', 'issnl']].to_excel('sample/journals_ids.xlsx', index=False) - - -# In[ ]: - - - - diff --git a/import_scripts/04_oacct_publishers.md b/import_scripts/04_oacct_publishers.md deleted file mode 100644 index c855a57c..00000000 --- a/import_scripts/04_oacct_publishers.md +++ /dev/null @@ -1,2826 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Extraction des données des éditeurs - -Sources : -1. Données de ISSN.org (JSON) - -### Format des données source - -* Noeud : "@graph" -* spatial & publisher : - * "@id": "resource/ISSN/0140-6736", - * "spatial": [ - "http://id.loc.gov/vocabulary/countries/ne", - "https://www.iso.org/obp/ui/#iso:code:3166:NL" - ], - -Exemple avec plusieurs éditeurs dans le temps : - - "publisher": [ - "resource/ISSN/0140-6736#Publisher-Elsevier", - "resource/ISSN/0140-6736#Publisher-J._Onwhyn" - ], - - { - "@id": "resource/ISSN/0140-6736#LatestPublicationEvent", - "@type": "http://schema.org/PublicationEvent", - "publishedBy": "resource/ISSN/0140-6736#Publisher-Elsevier", - "location": "resource/ISSN/0140-6736#PublicationPlace-Amsterdam" - }, - - { - "@id": "resource/ISSN/0140-6736#Publisher-Elsevier", - "@type": "http://schema.org/Organization", - "name": "Elsevier" - }, - -Exemple avec un seul éditeur dans le temps : - - "publisher": "resource/ISSN/0899-8418#Publisher-Wiley", - - { - "@id": "resource/ISSN/0899-8418#EarliestPublicationEvent", - "@type": "http://schema.org/PublicationEvent", - "publishedBy": "resource/ISSN/0899-8418#Publisher-Wiley", - "temporal": "c1989-", - "location": [ - "resource/ISSN/0899-8418#PublicationPlace-New_York", - "resource/ISSN/0899-8418#PublicationPlace-Chichester" - ] - }, - - { - "@id": "resource/ISSN/0899-8418#Publisher-Wiley", - "@type": "http://schema.org/Organization", - "name": "Wiley" - }, - -Exemple avec une liste d'éditeurs finaux : - - { - "@id": "resource/ISSN/2174-8454#LatestPublicationEvent", - "@type": "http://schema.org/PublicationEvent", - "publishedBy": [ - "resource/ISSN/2174-8454#Publisher-The_Global_Studies_Institute_de_l’Université_de_Genève", - "resource/ISSN/2174-8454#Publisher-Universitat_de_València,_Departamento_de_Teoría_de_los_Lenguajes_y_Ciencias_de_la_Comunicación" - ], - "location": "resource/ISSN/2174-8454#PublicationPlace-Valencia" - }, - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -``` - -## Table Publishers - - -```python -# creation du DF -# 'country' supprimé pour l'ajouter aux journaux -# 'oa_status' supprimé pour le moment -col_names = ['id', - 'name', - 'publisher_id_issn', - ] -publisher_issn = pd.DataFrame(columns = col_names) -publisher_issn -``` - - - - -
- - - - - - - - - - - - -
idnamepublisher_id_issn
-
- - - -## Table Journals - - -```python -journal = pd.read_csv('sample/journals_brut.tsv', encoding='utf-8', header=0, sep='\t') -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...doaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_status
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...1.0PLoS One1.0NaN0.0NaN0.04.035714NaN5
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...0.0NaN0.0preserved1.0NaN0.0NaN1.01
..................................................................
9069970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9079980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
9089991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN1
90910000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...0.0NaN0.0preserved1.0NaN0.0NaNNaN1
91010010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...0.0NaN0.0NaN0.0NaN0.0NaNNaN1
-

911 rows × 23 columns

-
- - - -## Table Journals Publishers - - -```python -# creation du DF -col_names = ['journal', - 'publisher_id_issn' - ] -journal_publisher = pd.DataFrame(columns = col_names) -journal_publisher -``` - - - - -
- - - - - - - - - - - -
journalpublisher_id_issn
-
- - - - -```python -# extraction des informations à partir des données ISSN.org -for index, row in journal.iterrows(): - journal_id = row['id'] - journal_issn = row['issn'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - publisher_name = '' - publisher_country = '' - publisher_id = '' - publisher_id_first = '' - publisher_id_last = '' - # export en json - if os.path.exists('issn/data/' + journal_issn + '.json'): - with open('issn/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + journal_issn + '#LatestPublicationEvent'): - if ('publishedBy' in x): - publisher_id_last = x['publishedBy'] - elif (x['@id'] == 'resource/ISSN/' + journal_issn + '#EarliestPublicationEvent'): - if ('publishedBy' in x): - publisher_id_first = x['publishedBy'] - if (publisher_id_last != ''): - publisher_id = publisher_id_last - else : - publisher_id = publisher_id_first - if type(publisher_id) is list: - for pid in publisher_id: - if (pid != ''): - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == pid): - if ('name' in x): - publisher_name = x['name'] - publisher_issn = publisher_issn.append({'publisher_id_issn' : pid, 'name' : publisher_name}, ignore_index=True) - journal_publisher = journal_publisher.append({'journal' : journal_id, 'publisher_id_issn' : pid}, ignore_index=True) - else : - if (publisher_id != ''): - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == publisher_id): - if ('name' in x): - publisher_name = x['name'] - publisher_issn = publisher_issn.append({'publisher_id_issn' : publisher_id, 'name' : publisher_name}, ignore_index=True) - journal_publisher = journal_publisher.append({'journal' : journal_id, 'publisher_id_issn' : publisher_id}, ignore_index=True) - else : - print(row['issn'] + ' - pas trouvé') -``` - - 0 - 10 - 20 - 30 - 40 - 50 - 60 - 70 - 80 - 90 - 100 - 110 - 120 - 130 - 140 - 150 - 160 - 170 - 180 - 190 - 200 - 210 - 220 - 230 - 240 - 250 - 260 - 270 - 280 - 290 - 300 - 310 - 320 - 330 - 340 - 350 - 360 - 370 - 380 - 390 - 400 - 410 - 420 - 430 - 440 - 450 - 460 - 470 - 480 - 490 - 500 - 510 - 520 - 530 - 540 - 550 - 560 - 570 - 580 - 590 - 600 - 610 - 620 - 630 - 640 - 650 - 660 - 670 - 680 - 690 - 700 - 710 - 720 - 730 - 740 - 750 - 760 - 770 - 780 - 790 - 800 - 810 - 820 - 830 - 840 - 850 - 860 - 870 - 880 - 890 - 900 - 910 - - - -```python -publisher_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamepublisher_id_issn
0NaNRevue Médicale Suisseresource/ISSN/1660-9379#Publisher-Revue_Médica...
1NaNAmerican Physical Societyresource/ISSN/0031-9007#Publisher-American_Phy...
2NaNPublic Library of Scienceresource/ISSN/1932-6203#Publisher-Public_Libra...
3NaNThe Global Studies Institute de l’Université d...resource/ISSN/2174-8454#Publisher-The_Global_S...
4NaNUniversitat de València, Departamento de Teorí...resource/ISSN/2174-8454#Publisher-Universitat_...
............
940NaNIOP Publishingresource/ISSN/0964-1726#Publisher-IOP_Publishing
941NaNElsevier [etc.]resource/ISSN/0022-3468#Publisher-Elsevier_[etc.]
942NaNSpringerresource/ISSN/1432-2064#Publisher-Springer
943NaNPergamonresource/ISSN/0960-1481#Publisher-Pergamon
944NaNAmerican Physiological Societyresource/ISSN/0161-7567#Publisher-American_Phy...
-

945 rows × 3 columns

-
- - - - -```python -# simlification des IDs -publisher_issn[['publisher_id_racine', 'publisher_id_fin']] = publisher_issn['publisher_id_issn'].str.split('#Publisher-', n=1, expand=True) -publisher_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamepublisher_id_issnpublisher_id_racinepublisher_id_fin
0NaNRevue Médicale Suisseresource/ISSN/1660-9379#Publisher-Revue_Médica...resource/ISSN/1660-9379Revue_Médicale_Suisse
1NaNAmerican Physical Societyresource/ISSN/0031-9007#Publisher-American_Phy...resource/ISSN/0031-9007American_Physical_Society
2NaNPublic Library of Scienceresource/ISSN/1932-6203#Publisher-Public_Libra...resource/ISSN/1932-6203Public_Library_of_Science
3NaNThe Global Studies Institute de l’Université d...resource/ISSN/2174-8454#Publisher-The_Global_S...resource/ISSN/2174-8454The_Global_Studies_Institute_de_l’Université_d...
4NaNUniversitat de València, Departamento de Teorí...resource/ISSN/2174-8454#Publisher-Universitat_...resource/ISSN/2174-8454Universitat_de_València,_Departamento_de_Teorí...
..................
940NaNIOP Publishingresource/ISSN/0964-1726#Publisher-IOP_Publishingresource/ISSN/0964-1726IOP_Publishing
941NaNElsevier [etc.]resource/ISSN/0022-3468#Publisher-Elsevier_[etc.]resource/ISSN/0022-3468Elsevier_[etc.]
942NaNSpringerresource/ISSN/1432-2064#Publisher-Springerresource/ISSN/1432-2064Springer
943NaNPergamonresource/ISSN/0960-1481#Publisher-Pergamonresource/ISSN/0960-1481Pergamon
944NaNAmerican Physiological Societyresource/ISSN/0161-7567#Publisher-American_Phy...resource/ISSN/0161-7567American_Physiological_Society
-

945 rows × 5 columns

-
- - - - -```python -# simplifications -del publisher_issn['publisher_id_issn'] -del publisher_issn['publisher_id_racine'] -del publisher_issn['id'] -publisher_issn = publisher_issn.rename(columns={'publisher_id_fin': 'publisher_id_issn'}) -publisher_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namepublisher_id_issn
0Revue Médicale SuisseRevue_Médicale_Suisse
1American Physical SocietyAmerican_Physical_Society
2Public Library of SciencePublic_Library_of_Science
3The Global Studies Institute de l’Université d...The_Global_Studies_Institute_de_l’Université_d...
4Universitat de València, Departamento de Teorí...Universitat_de_València,_Departamento_de_Teorí...
.........
940IOP PublishingIOP_Publishing
941Elsevier [etc.]Elsevier_[etc.]
942SpringerSpringer
943PergamonPergamon
944American Physiological SocietyAmerican_Physiological_Society
-

945 rows × 2 columns

-
- - - - -```python -# supprimer les crochets et supprimer les doublons -# publisher['publisher_id'] = publisher['publisher_id'].str.replace('[', '') -# publisher['publisher_id'] = publisher['publisher_id'].str.replace(']', '') -# publisher['name'] = publisher['name'].str.replace('[', '') -# publisher['name'] = publisher['name'].str.replace(']', '') -publisher_issn = publisher_issn.drop_duplicates(subset=['publisher_id_issn']) -publisher_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namepublisher_id_issn
0Revue Médicale SuisseRevue_Médicale_Suisse
1American Physical SocietyAmerican_Physical_Society
2Public Library of SciencePublic_Library_of_Science
3The Global Studies Institute de l’Université d...The_Global_Studies_Institute_de_l’Université_d...
4Universitat de València, Departamento de Teorí...Universitat_de_València,_Departamento_de_Teorí...
.........
929FisherFisher
930Tipografia La CommercialeTipografia_La_Commerciale
932Red.: Prof. Dr. F. Cavalli, Istituto oncologic...Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...
934Excerpta MedicaExcerpta_Medica
937Generative Grammar Group of the Department of ...Generative_Grammar_Group_of_the_Department_of_...
-

380 rows × 2 columns

-
- - - - -```python -# test publishers sans nom -publisher_issn.loc[publisher_issn['name'] == ''] -``` - - - - -
- - - - - - - - - - - - - - - - -
namepublisher_id_issn
241None
-
- - - - -```python -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_id_issn
01resource/ISSN/1660-9379#Publisher-Revue_Médica...
12resource/ISSN/0031-9007#Publisher-American_Phy...
23resource/ISSN/1932-6203#Publisher-Public_Libra...
34resource/ISSN/2174-8454#Publisher-The_Global_S...
44resource/ISSN/2174-8454#Publisher-Universitat_...
.........
940997resource/ISSN/0964-1726#Publisher-IOP_Publishing
941998resource/ISSN/0022-3468#Publisher-Elsevier_[etc.]
942999resource/ISSN/1432-2064#Publisher-Springer
9431000resource/ISSN/0960-1481#Publisher-Pergamon
9441001resource/ISSN/0161-7567#Publisher-American_Phy...
-

945 rows × 2 columns

-
- - - - -```python -# simlification des IDs -journal_publisher[['publisher_id_racine', 'publisher_id_fin']] = journal_publisher['publisher_id_issn'].str.split('#Publisher-', n=1, expand=True) -# simplifications -del journal_publisher['publisher_id_issn'] -del journal_publisher['publisher_id_racine'] -journal_publisher = journal_publisher.rename(columns={'publisher_id_fin': 'publisher_id_issn'}) -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_id_issn
01Revue_Médicale_Suisse
12American_Physical_Society
23Public_Library_of_Science
34The_Global_Studies_Institute_de_l’Université_d...
44Universitat_de_València,_Departamento_de_Teorí...
.........
940997IOP_Publishing
941998Elsevier_[etc.]
942999Springer
9431000Pergamon
9441001American_Physiological_Society
-

945 rows × 2 columns

-
- - - - -```python -# merge avec journals -journal_publisher = pd.merge(journal_publisher, publisher_issn, on='publisher_id_issn', how='left') -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_id_issnname
01Revue_Médicale_SuisseRevue Médicale Suisse
12American_Physical_SocietyAmerican Physical Society
23Public_Library_of_SciencePublic Library of Science
34The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...
44Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...
............
940997IOP_PublishingIOP Publishing
941998Elsevier_[etc.]Elsevier [etc.]
942999SpringerSpringer
9431000PergamonPergamon
9441001American_Physiological_SocietyAmerican Physiological Society
-

945 rows × 3 columns

-
- - - - -```python -journal_publisher = journal_publisher.rename(columns={'publisher_id_issn': 'publisher_id'}) -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idname
01Revue_Médicale_SuisseRevue Médicale Suisse
12American_Physical_SocietyAmerican Physical Society
23Public_Library_of_SciencePublic Library of Science
34The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...
44Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...
............
940997IOP_PublishingIOP Publishing
941998Elsevier_[etc.]Elsevier [etc.]
942999SpringerSpringer
9431000PergamonPergamon
9441001American_Physiological_SocietyAmerican Physiological Society
-

945 rows × 3 columns

-
- - - - -```python -publisher = journal_publisher[['publisher_id', 'name']] -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idname
0Revue_Médicale_SuisseRevue Médicale Suisse
1American_Physical_SocietyAmerican Physical Society
2Public_Library_of_SciencePublic Library of Science
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...
.........
940IOP_PublishingIOP Publishing
941Elsevier_[etc.]Elsevier [etc.]
942SpringerSpringer
943PergamonPergamon
944American_Physiological_SocietyAmerican Physiological Society
-

945 rows × 2 columns

-
- - - - -```python -# supprimer les doublons -publisher = publisher.drop_duplicates(subset='publisher_id') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idname
0Revue_Médicale_SuisseRevue Médicale Suisse
1American_Physical_SocietyAmerican Physical Society
2Public_Library_of_SciencePublic Library of Science
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...
.........
929FisherFisher
930Tipografia_La_CommercialeTipografia La Commerciale
932Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...
934Excerpta_MedicaExcerpta Medica
937Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...
-

380 rows × 2 columns

-
- - - - -```python -# convertir l'index en id -publisher = publisher.reset_index() -# ajout de l'id avec l'index + 1 -publisher['id'] = publisher['index'] + 1 -del publisher['index'] -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idnameid
0Revue_Médicale_SuisseRevue Médicale Suisse1
1American_Physical_SocietyAmerican Physical Society2
2Public_Library_of_SciencePublic Library of Science3
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
............
375FisherFisher930
376Tipografia_La_CommercialeTipografia La Commerciale931
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...933
378Excerpta_MedicaExcerpta Medica935
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...938
-

380 rows × 3 columns

-
- - - - -```python -# convertir l'index en id -publisher = publisher.reset_index() -# ajout de l'id avec l'index + 1 -publisher['id'] = publisher['index'] + 1 -del publisher['index'] -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idnameid
0Revue_Médicale_SuisseRevue Médicale Suisse1
1American_Physical_SocietyAmerican Physical Society2
2Public_Library_of_SciencePublic Library of Science3
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
............
375FisherFisher376
376Tipografia_La_CommercialeTipografia La Commerciale377
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378
378Excerpta_MedicaExcerpta Medica379
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380
-

380 rows × 3 columns

-
- - - - -```python -# ajout de la valeur UNKNOWN -# 'country': 999999 -publisher = publisher.append({'id' : 999999, 'name' : 'UNKNOWN', 'publisher_id': '999999'}, ignore_index=True) -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idnameid
0Revue_Médicale_SuisseRevue Médicale Suisse1
1American_Physical_SocietyAmerican Physical Society2
2Public_Library_of_SciencePublic Library of Science3
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
............
376Tipografia_La_CommercialeTipografia La Commerciale377
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378
378Excerpta_MedicaExcerpta Medica379
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380
380999999UNKNOWN999999
-

381 rows × 3 columns

-
- - - - -```python -# recuperation de l'id du publisher -journal_publisher = pd.merge(journal_publisher, publisher[['publisher_id', 'id']], on='publisher_id', how='left') -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idnameid
01Revue_Médicale_SuisseRevue Médicale Suisse1
12American_Physical_SocietyAmerican Physical Society2
23Public_Library_of_SciencePublic Library of Science3
34The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
44Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
...............
940997IOP_PublishingIOP Publishing47
941998Elsevier_[etc.]Elsevier [etc.]75
942999SpringerSpringer8
9431000PergamonPergamon119
9441001American_Physiological_SocietyAmerican Physiological Society217
-

945 rows × 4 columns

-
- - - - -```python -journal_publisher = journal_publisher.rename(columns={'id': 'publisher'}) -journal_publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idnamepublisher
01Revue_Médicale_SuisseRevue Médicale Suisse1
12American_Physical_SocietyAmerican Physical Society2
23Public_Library_of_SciencePublic Library of Science3
34The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
44Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
...............
940997IOP_PublishingIOP Publishing47
941998Elsevier_[etc.]Elsevier [etc.]75
942999SpringerSpringer8
9431000PergamonPergamon119
9441001American_Physiological_SocietyAmerican Physiological Society217
-

945 rows × 4 columns

-
- - - - -```python -# ajout du publisher id au journals_brut -journal_publisher_ids = journal_publisher[['journal', 'publisher']] -journal_publisher_ids = journal_publisher_ids.rename(columns={'journal': 'id'}) -journal_publisher_ids['publisher'] = journal_publisher_ids['publisher'].astype(str) -journal_publisher_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idpublisher
011
122
233
344
445
.........
94099747
94199875
9429998
9431000119
9441001217
-

945 rows × 2 columns

-
- - - - -```python -# concat valeurs avec même id -journal_publisher_grouped = journal_publisher_ids.groupby('id').agg({'publisher': lambda x: ', '.join(x)}) -journal_publisher_grouped -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher
id
11
22
33
44, 5
56
......
99747
99875
9998
1000119
1001217
-

911 rows × 1 columns

-
- - - - -```python -# recuperation de l'id du publisher -journals = pd.merge(journal, journal_publisher_grouped, on='id', how='left') -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountry...lockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_statuspublisher
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215...NaN0.0NaN0.0NaN0.0NaNNaN11
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236...NaN0.0preserved1.0NaN0.0NaN1.012
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236...PLoS One1.0NaN0.0NaN0.04.035714NaN53
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209...NaN0.0NaN0.0NaN0.0NaNNaN14, 5
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236...NaN0.0preserved1.0NaN0.0NaN1.016
..................................................................
9069970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234...NaN0.0preserved1.0NaN0.0NaNNaN147
9079980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236...NaN0.0preserved1.0NaN0.0NaNNaN175
9089991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483...Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18
90910000960-14810960-1481Renewable energy19919999NaNRenew. energy124234...NaN0.0preserved1.0NaN0.0NaNNaN1119
91010010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236...NaN0.0NaN0.0NaN0.0NaNNaN1217
-

911 rows × 24 columns

-
- - - - -```python -# export csv -publisher.to_csv('sample/publishers_brut.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -publisher.to_excel('sample/publishers_brut.xlsx', index=False) -``` - - -```python -# export csv brut des journals -journals.to_csv('sample/journals_publishers_brut.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel brut -journals.to_excel('sample/journals_publishers_brut.xlsx', index=False) -``` - - -```python -# export csv brut des ids -journal_publisher_ids.to_csv('sample/journals_publishers_ids.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel brut des ids -journal_publisher_ids.to_excel('sample/journals_publishers_ids.xlsx', index=False) -``` diff --git a/import_scripts/04_oacct_publishers.py b/import_scripts/04_oacct_publishers.py deleted file mode 100644 index d18e1e59..00000000 --- a/import_scripts/04_oacct_publishers.py +++ /dev/null @@ -1,387 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Extraction des données des éditeurs -# -# Sources : -# 1. Données de ISSN.org (JSON) -# -# ### Format des données source -# -# * Noeud : "@graph" -# * spatial & publisher : -# * "@id": "resource/ISSN/0140-6736", -# * "spatial": [ -# "http://id.loc.gov/vocabulary/countries/ne", -# "https://www.iso.org/obp/ui/#iso:code:3166:NL" -# ], -# -# Exemple avec plusieurs éditeurs dans le temps : -# -# "publisher": [ -# "resource/ISSN/0140-6736#Publisher-Elsevier", -# "resource/ISSN/0140-6736#Publisher-J._Onwhyn" -# ], -# -# { -# "@id": "resource/ISSN/0140-6736#LatestPublicationEvent", -# "@type": "http://schema.org/PublicationEvent", -# "publishedBy": "resource/ISSN/0140-6736#Publisher-Elsevier", -# "location": "resource/ISSN/0140-6736#PublicationPlace-Amsterdam" -# }, -# -# { -# "@id": "resource/ISSN/0140-6736#Publisher-Elsevier", -# "@type": "http://schema.org/Organization", -# "name": "Elsevier" -# }, -# -# Exemple avec un seul éditeur dans le temps : -# -# "publisher": "resource/ISSN/0899-8418#Publisher-Wiley", -# -# { -# "@id": "resource/ISSN/0899-8418#EarliestPublicationEvent", -# "@type": "http://schema.org/PublicationEvent", -# "publishedBy": "resource/ISSN/0899-8418#Publisher-Wiley", -# "temporal": "c1989-", -# "location": [ -# "resource/ISSN/0899-8418#PublicationPlace-New_York", -# "resource/ISSN/0899-8418#PublicationPlace-Chichester" -# ] -# }, -# -# { -# "@id": "resource/ISSN/0899-8418#Publisher-Wiley", -# "@type": "http://schema.org/Organization", -# "name": "Wiley" -# }, -# -# Exemple avec une liste d'éditeurs finaux : -# -# { -# "@id": "resource/ISSN/2174-8454#LatestPublicationEvent", -# "@type": "http://schema.org/PublicationEvent", -# "publishedBy": [ -# "resource/ISSN/2174-8454#Publisher-The_Global_Studies_Institute_de_l’Université_de_Genève", -# "resource/ISSN/2174-8454#Publisher-Universitat_de_València,_Departamento_de_Teoría_de_los_Lenguajes_y_Ciencias_de_la_Comunicación" -# ], -# "location": "resource/ISSN/2174-8454#PublicationPlace-Valencia" -# }, - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os - - -# ## Table Publishers - -# In[2]: - - -# creation du DF -# 'country' supprimé pour l'ajouter aux journaux -# 'oa_status' supprimé pour le moment -col_names = ['id', - 'name', - 'publisher_id_issn', - ] -publisher_issn = pd.DataFrame(columns = col_names) -publisher_issn - - -# ## Table Journals - -# In[3]: - - -journal = pd.read_csv('sample/journals_brut.tsv', encoding='utf-8', header=0, sep='\t') -journal - - -# ## Table Journals Publishers - -# In[4]: - - -# creation du DF -col_names = ['journal', - 'publisher_id_issn' - ] -journal_publisher = pd.DataFrame(columns = col_names) -journal_publisher - - -# In[5]: - - -# extraction des informations à partir des données ISSN.org -for index, row in journal.iterrows(): - journal_id = row['id'] - journal_issn = row['issn'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - publisher_name = '' - publisher_country = '' - publisher_id = '' - publisher_id_first = '' - publisher_id_last = '' - # export en json - if os.path.exists('issn/data/' + journal_issn + '.json'): - with open('issn/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + journal_issn + '#LatestPublicationEvent'): - if ('publishedBy' in x): - publisher_id_last = x['publishedBy'] - elif (x['@id'] == 'resource/ISSN/' + journal_issn + '#EarliestPublicationEvent'): - if ('publishedBy' in x): - publisher_id_first = x['publishedBy'] - if (publisher_id_last != ''): - publisher_id = publisher_id_last - else : - publisher_id = publisher_id_first - if type(publisher_id) is list: - for pid in publisher_id: - if (pid != ''): - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == pid): - if ('name' in x): - publisher_name = x['name'] - publisher_issn = publisher_issn.append({'publisher_id_issn' : pid, 'name' : publisher_name}, ignore_index=True) - journal_publisher = journal_publisher.append({'journal' : journal_id, 'publisher_id_issn' : pid}, ignore_index=True) - else : - if (publisher_id != ''): - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == publisher_id): - if ('name' in x): - publisher_name = x['name'] - publisher_issn = publisher_issn.append({'publisher_id_issn' : publisher_id, 'name' : publisher_name}, ignore_index=True) - journal_publisher = journal_publisher.append({'journal' : journal_id, 'publisher_id_issn' : publisher_id}, ignore_index=True) - else : - print(row['issn'] + ' - pas trouvé') - - -# In[6]: - - -publisher_issn - - -# In[7]: - - -# simlification des IDs -publisher_issn[['publisher_id_racine', 'publisher_id_fin']] = publisher_issn['publisher_id_issn'].str.split('#Publisher-', n=1, expand=True) -publisher_issn - - -# In[8]: - - -# simplifications -del publisher_issn['publisher_id_issn'] -del publisher_issn['publisher_id_racine'] -del publisher_issn['id'] -publisher_issn = publisher_issn.rename(columns={'publisher_id_fin': 'publisher_id_issn'}) -publisher_issn - - -# In[9]: - - -# supprimer les crochets et supprimer les doublons -# publisher['publisher_id'] = publisher['publisher_id'].str.replace('[', '') -# publisher['publisher_id'] = publisher['publisher_id'].str.replace(']', '') -# publisher['name'] = publisher['name'].str.replace('[', '') -# publisher['name'] = publisher['name'].str.replace(']', '') -publisher_issn = publisher_issn.drop_duplicates(subset=['publisher_id_issn']) -publisher_issn - - -# In[10]: - - -# test publishers sans nom -publisher_issn.loc[publisher_issn['name'] == ''] - - -# In[11]: - - -journal_publisher - - -# In[12]: - - -# simlification des IDs -journal_publisher[['publisher_id_racine', 'publisher_id_fin']] = journal_publisher['publisher_id_issn'].str.split('#Publisher-', n=1, expand=True) -# simplifications -del journal_publisher['publisher_id_issn'] -del journal_publisher['publisher_id_racine'] -journal_publisher = journal_publisher.rename(columns={'publisher_id_fin': 'publisher_id_issn'}) -journal_publisher - - -# In[13]: - - -# merge avec journals -journal_publisher = pd.merge(journal_publisher, publisher_issn, on='publisher_id_issn', how='left') -journal_publisher - - -# In[14]: - - -journal_publisher = journal_publisher.rename(columns={'publisher_id_issn': 'publisher_id'}) -journal_publisher - - -# In[15]: - - -publisher = journal_publisher[['publisher_id', 'name']] -publisher - - -# In[16]: - - -# supprimer les doublons -publisher = publisher.drop_duplicates(subset='publisher_id') -publisher - - -# In[17]: - - -# convertir l'index en id -publisher = publisher.reset_index() -# ajout de l'id avec l'index + 1 -publisher['id'] = publisher['index'] + 1 -del publisher['index'] -publisher - - -# In[18]: - - -# convertir l'index en id -publisher = publisher.reset_index() -# ajout de l'id avec l'index + 1 -publisher['id'] = publisher['index'] + 1 -del publisher['index'] -publisher - - -# In[19]: - - -# ajout de la valeur UNKNOWN -# 'country': 999999 -publisher = publisher.append({'id' : 999999, 'name' : 'UNKNOWN', 'publisher_id': '999999'}, ignore_index=True) -publisher - - -# In[20]: - - -# recuperation de l'id du publisher -journal_publisher = pd.merge(journal_publisher, publisher[['publisher_id', 'id']], on='publisher_id', how='left') -journal_publisher - - -# In[21]: - - -journal_publisher = journal_publisher.rename(columns={'id': 'publisher'}) -journal_publisher - - -# In[22]: - - -# ajout du publisher id au journals_brut -journal_publisher_ids = journal_publisher[['journal', 'publisher']] -journal_publisher_ids = journal_publisher_ids.rename(columns={'journal': 'id'}) -journal_publisher_ids['publisher'] = journal_publisher_ids['publisher'].astype(str) -journal_publisher_ids - - -# In[23]: - - -# concat valeurs avec même id -journal_publisher_grouped = journal_publisher_ids.groupby('id').agg({'publisher': lambda x: ', '.join(x)}) -journal_publisher_grouped - - -# In[24]: - - -# recuperation de l'id du publisher -journals = pd.merge(journal, journal_publisher_grouped, on='id', how='left') -journals - - -# In[25]: - - -# export csv -publisher.to_csv('sample/publishers_brut.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[26]: - - -# export excel -publisher.to_excel('sample/publishers_brut.xlsx', index=False) - - -# In[27]: - - -# export csv brut des journals -journals.to_csv('sample/journals_publishers_brut.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[28]: - - -# export excel brut -journals.to_excel('sample/journals_publishers_brut.xlsx', index=False) - - -# In[29]: - - -# export csv brut des ids -journal_publisher_ids.to_csv('sample/journals_publishers_ids.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[30]: - - -# export excel brut des ids -journal_publisher_ids.to_excel('sample/journals_publishers_ids.xlsx', index=False) - diff --git a/import_scripts/05_oacct_issns.md b/import_scripts/05_oacct_issns.md deleted file mode 100644 index 39a18fd7..00000000 --- a/import_scripts/05_oacct_issns.md +++ /dev/null @@ -1,2109 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Table ISSNs - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -``` - - -```python -# ajout des ISSN-L -issns = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISSNISSN-L
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -issns = issns.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnl
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -journals = pd.read_csv('sample/journals_brut.tsv', encoding='utf-8', sep='\t', usecols=(['id', 'issn', 'issnl'])) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnl
011660-93791660-9379
120031-90070031-9007
231932-62031932-6203
342174-84542174-8454
451098-01211098-0121
............
9069970964-17260964-1726
9079980022-34680022-3468
9089991432-20640178-8051
90910000960-14810960-1481
91010010161-75670161-7567
-

911 rows × 3 columns

-
- - - - -```python -# renomer les colonnes id -journals = journals.rename(columns = {'id' : 'journal'}) -journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnissnl
011660-93791660-9379
120031-90070031-9007
231932-62031932-6203
342174-84542174-8454
451098-01211098-0121
............
9069970964-17260964-1726
9079980022-34680022-3468
9089991432-20640178-8051
90910000960-14810960-1481
91010010161-75670161-7567
-

911 rows × 3 columns

-
- - - - -```python -# test journals sans issn -journals.loc[journals['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - -
journalissnissnl
-
- - - - -```python -journals.loc[journals['journal'] == 5] -``` - - - - -
- - - - - - - - - - - - - - - - - - -
journalissnissnl
451098-01211098-0121
-
- - - -## Extraction du format - - -```python -# creation du DF -col_names = ['issn', - 'format' - ] -journals_format = pd.DataFrame(columns = col_names) -journals_format -``` - - - - -
- - - - - - - - - - - -
issnformat
-
- - - - -```python -# extraction des informations à partir des données ISSN.org -for index, row in journals.iterrows(): - # myid = row['journal'] - myissn = row['issn'] - # myissnl = row['issnl'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - myformat = np.nan - # export en json - if os.path.exists('issn/data/' + myissn + '.json'): - with open('issn/data/' + myissn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + myissn): - if ('format' in x): - myformats = x['format'] - if type(myformats) is list: - myformat = myformats[0].replace('vocabularies/medium#', '') - else : - myformat = myformats.replace('vocabularies/medium#', '') - # journals_format.at[index,'journal'] = myid - journals_format.at[index,'issn'] = myissn - # journals2.at[index,'issnl'] = myissnl - journals_format.at[index,'format'] = myformat - else : - print(row['issn'] + ' - pas trouvé') -``` - - 0 - 10 - 20 - 30 - 40 - 50 - 60 - 70 - 80 - 90 - 100 - 110 - 120 - 130 - 140 - 150 - 160 - 170 - 180 - 190 - 200 - 210 - 220 - 230 - 240 - 250 - 260 - 270 - 280 - 290 - 300 - 310 - 320 - 330 - 340 - 350 - 360 - 370 - 380 - 390 - 400 - 410 - 420 - 430 - 440 - 450 - 460 - 470 - 480 - 490 - 500 - 510 - 520 - 530 - 540 - 550 - 560 - 570 - 580 - 590 - 600 - 610 - 620 - 630 - 640 - 650 - 660 - 670 - 680 - 690 - 700 - 710 - 720 - 730 - 740 - 750 - 760 - 770 - 780 - 790 - 800 - 810 - 820 - 830 - 840 - 850 - 860 - 870 - 880 - 890 - 900 - 910 - - - -```python -journals_format -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnformat
01660-9379Print
10031-9007Print
21932-6203Online
32174-8454Print
41098-0121Print
.........
9060964-1726Print
9070022-3468Print
9081432-2064Online
9090960-1481Print
9100161-7567Print
-

911 rows × 2 columns

-
- - - - -```python -# test -journals_format.loc[journals_format['format'].isnull()] -``` - - - - -
- - - - - - - - - - - -
issnformat
-
- - - - -```python -journals_format['format'].value_counts() -``` - - - - - Print 817 - Online 92 - Other 2 - Name: format, dtype: int64 - - - - -```python -del journals['issn'] -``` - - -```python -issns = pd.merge(issns, journals, on='issnl', how='outer') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournal
00000-00190000-0019NaN
12150-40080000-0019NaN
20000-00270000-0027NaN
30000-00430000-0043NaN
40000-00510000-0051NaN
............
19959158756-99738756-9973NaN
19959168756-99818756-9981NaN
19959178756-999X8756-999XNaN
1995918NaN2624-8557120.0
1995919NaN0032-1052936.0
-

1995920 rows × 3 columns

-
- - - - -```python -# tester les lignes sans issn -issns.loc[issns['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournal
1995918NaN2624-8557120.0
1995919NaN0032-1052936.0
-
- - - - -```python -# garder les lilgnes non null -issns = issns.loc[issns['issn'].notna()] -``` - - -```python -# isoler les lignes avec marge -issns2 = issns.loc[issns['journal'].notna()] -issns2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournal
3340001-28150001-2815532.0
3351399-00390001-2815532.0
4930001-48420001-4842498.0
4941520-48980001-4842498.0
5050001-49660001-4966789.0
............
19213522470-00452470-0045533.0
19213532470-00532470-0045533.0
19257402475-99532475-9953608.0
19518542504-44272504-4427994.0
19518552504-44352504-4427994.0
-

1760 rows × 3 columns

-
- - - - -```python -# ajout du format par ISSN -issns2 = pd.merge(issns2, journals_format, on='issn', how='outer') -issns2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformat
00001-28150001-2815532.0Print
11399-00390001-2815532.0NaN
20001-48420001-4842498.0Print
31520-48980001-4842498.0NaN
40001-49660001-4966789.0Print
...............
17582504-44272504-4427994.0Print
17592504-44352504-4427994.0NaN
17602624-8557NaNNaNOnline
17612469-9926NaNNaNPrint
17621529-4242NaNNaNOnline
-

1763 rows × 4 columns

-
- - - - -```python -# isoler les lignes avec marge -issns2 = issns2.loc[issns2['journal'].notna()] -issns2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformat
00001-28150001-2815532.0Print
11399-00390001-2815532.0NaN
20001-48420001-4842498.0Print
31520-48980001-4842498.0NaN
40001-49660001-4966789.0Print
...............
17552470-00452470-0045533.0Other
17562470-00532470-0045533.0NaN
17572475-99532475-9953608.0Online
17582504-44272504-4427994.0Print
17592504-44352504-4427994.0NaN
-

1760 rows × 4 columns

-
- - - - -```python -issns2['format'] = issns2['format'].str.upper() -issns2['format'] = issns2['format'].str.replace('ONLINE', 'ELECTRONIC') -# DigitalCarrier -issns2['format'] = issns2['format'].str.replace('DIGITALCARRIER', 'ELECTRONIC') -issns2 -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - """Entry point for launching an IPython kernel. - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - after removing the cwd from sys.path. - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformat
00001-28150001-2815532.0PRINT
11399-00390001-2815532.0NaN
20001-48420001-4842498.0PRINT
31520-48980001-4842498.0NaN
40001-49660001-4966789.0PRINT
...............
17552470-00452470-0045533.0OTHER
17562470-00532470-0045533.0NaN
17572475-99532475-9953608.0ELECTRONIC
17582504-44272504-4427994.0PRINT
17592504-44352504-4427994.0NaN
-

1760 rows × 4 columns

-
- - - - -```python -issns2['format'].value_counts() -``` - - - - - PRINT 816 - ELECTRONIC 90 - OTHER 2 - Name: format, dtype: int64 - - - - -```python -# tester les lignes sans issn -issns2.loc[issns2['format'].isnull()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformat
11399-00390001-2815532.0NaN
31520-48980001-4842498.0NaN
51520-85240001-4966789.0NaN
61520-90240001-4966789.0NaN
80942-09400001-6268166.0NaN
...............
17502469-99342469-9926870.0NaN
17522469-99692469-995041.0NaN
17542470-00292470-001080.0NaN
17562470-00532470-0045533.0NaN
17592504-44352504-4427994.0NaN
-

852 rows × 4 columns

-
- - - - -```python -# attribution de l'id du type -# PRINT = 1 -# ELECTRONIC = 2 -# OTHER = 3 -issns2['issn_type'] = issns2['format'] -issns2['issn_type'] = issns2['issn_type'].str.replace('PRINT', '1') -issns2['issn_type'] = issns2['issn_type'].str.replace('ELECTRONIC', '2') -issns2['issn_type'] = issns2['issn_type'].str.replace('OTHER', '3') -issns2['issn_type'] = issns2['issn_type'].fillna(3) -issns2 -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:5: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - """ - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:6: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:7: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - import sys - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:8: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:9: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - if __name__ == '__main__': - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_type
00001-28150001-2815532.0PRINT1
11399-00390001-2815532.0NaN3
20001-48420001-4842498.0PRINT1
31520-48980001-4842498.0NaN3
40001-49660001-4966789.0PRINT1
..................
17552470-00452470-0045533.0OTHER3
17562470-00532470-0045533.0NaN3
17572475-99532475-9953608.0ELECTRONIC2
17582504-44272504-4427994.0PRINT1
17592504-44352504-4427994.0NaN3
-

1760 rows × 5 columns

-
- - - - -```python -# convertir journal en int -issns2['journal'] = issns2['journal'].astype(int) -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - -```python -# convertir l'index en id -issns2 = issns2.reset_index() -issns2['id'] = issns2['index'] + 1 -del issns2['index'] -issns2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeid
00001-28150001-2815532PRINT11
11399-00390001-2815532NaN32
20001-48420001-4842498PRINT13
31520-48980001-4842498NaN34
40001-49660001-4966789PRINT15
.....................
17552470-00452470-0045533OTHER31756
17562470-00532470-0045533NaN31757
17572475-99532475-9953608ELECTRONIC21758
17582504-44272504-4427994PRINT11759
17592504-44352504-4427994NaN31760
-

1760 rows × 6 columns

-
- - - - -```python -issns2['issn_type'] = issns2['issn_type'].astype(int) -``` - - -```python -# supprimer les doublons par ISSN -issns2 = issns2.drop_duplicates(subset='issn') -issns2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeid
00001-28150001-2815532PRINT11
11399-00390001-2815532NaN32
20001-48420001-4842498PRINT13
31520-48980001-4842498NaN34
40001-49660001-4966789PRINT15
.....................
17552470-00452470-0045533OTHER31756
17562470-00532470-0045533NaN31757
17572475-99532475-9953608ELECTRONIC21758
17582504-44272504-4427994PRINT11759
17592504-44352504-4427994NaN31760
-

1760 rows × 6 columns

-
- - - - -```python -# export csv -issns2.to_csv('sample/issn_brut.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -issns2.to_excel('sample/issn_brut.xlsx', index=False) -``` - - -```python -# export CSV des IDs -issns2[['id', 'issn', 'issnl', 'journal']].to_csv('sample/issn_ids.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel des IDs -issns2[['id', 'issn', 'issnl', 'journal']].to_excel('sample/issn_ids.xlsx', index=False) -``` diff --git a/import_scripts/05_oacct_issns.py b/import_scripts/05_oacct_issns.py deleted file mode 100644 index af282efb..00000000 --- a/import_scripts/05_oacct_issns.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Table ISSNs - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os - - -# In[2]: - - -# ajout des ISSN-L -issns = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issns - - -# In[3]: - - -# renommer les colonnes -issns = issns.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -issns - - -# In[4]: - - -journals = pd.read_csv('sample/journals_brut.tsv', encoding='utf-8', sep='\t', usecols=(['id', 'issn', 'issnl'])) -journals - - -# In[5]: - - -# renomer les colonnes id -journals = journals.rename(columns = {'id' : 'journal'}) -journals - - -# In[6]: - - -# test journals sans issn -journals.loc[journals['issn'].isna()] - - -# In[7]: - - -journals.loc[journals['journal'] == 5] - - -# ## Extraction du format - -# In[8]: - - -# creation du DF -col_names = ['issn', - 'format' - ] -journals_format = pd.DataFrame(columns = col_names) -journals_format - - -# In[9]: - - -# extraction des informations à partir des données ISSN.org -for index, row in journals.iterrows(): - # myid = row['journal'] - myissn = row['issn'] - # myissnl = row['issnl'] - if (((index/10) - int(index/10)) == 0) : - print(index) - # initialisation des variables à extraire - myformat = np.nan - # export en json - if os.path.exists('issn/data/' + myissn + '.json'): - with open('issn/data/' + myissn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - for x in data['@graph']: - if ('@id' in x): - if (x['@id'] == 'resource/ISSN/' + myissn): - if ('format' in x): - myformats = x['format'] - if type(myformats) is list: - myformat = myformats[0].replace('vocabularies/medium#', '') - else : - myformat = myformats.replace('vocabularies/medium#', '') - # journals_format.at[index,'journal'] = myid - journals_format.at[index,'issn'] = myissn - # journals2.at[index,'issnl'] = myissnl - journals_format.at[index,'format'] = myformat - else : - print(row['issn'] + ' - pas trouvé') - - -# In[10]: - - -journals_format - - -# In[11]: - - -# test -journals_format.loc[journals_format['format'].isnull()] - - -# In[12]: - - -journals_format['format'].value_counts() - - -# In[13]: - - -del journals['issn'] - - -# In[14]: - - -issns = pd.merge(issns, journals, on='issnl', how='outer') -issns - - -# In[15]: - - -# tester les lignes sans issn -issns.loc[issns['issn'].isna()] - - -# In[16]: - - -# garder les lilgnes non null -issns = issns.loc[issns['issn'].notna()] - - -# In[17]: - - -# isoler les lignes avec marge -issns2 = issns.loc[issns['journal'].notna()] -issns2 - - -# In[18]: - - -# ajout du format par ISSN -issns2 = pd.merge(issns2, journals_format, on='issn', how='outer') -issns2 - - -# In[19]: - - -# isoler les lignes avec marge -issns2 = issns2.loc[issns2['journal'].notna()] -issns2 - - -# In[20]: - - -issns2['format'] = issns2['format'].str.upper() -issns2['format'] = issns2['format'].str.replace('ONLINE', 'ELECTRONIC') -# DigitalCarrier -issns2['format'] = issns2['format'].str.replace('DIGITALCARRIER', 'ELECTRONIC') -issns2 - - -# In[21]: - - -issns2['format'].value_counts() - - -# In[22]: - - -# tester les lignes sans issn -issns2.loc[issns2['format'].isnull()] - - -# In[23]: - - -# attribution de l'id du type -# PRINT = 1 -# ELECTRONIC = 2 -# OTHER = 3 -issns2['issn_type'] = issns2['format'] -issns2['issn_type'] = issns2['issn_type'].str.replace('PRINT', '1') -issns2['issn_type'] = issns2['issn_type'].str.replace('ELECTRONIC', '2') -issns2['issn_type'] = issns2['issn_type'].str.replace('OTHER', '3') -issns2['issn_type'] = issns2['issn_type'].fillna(3) -issns2 - - -# In[24]: - - -# convertir journal en int -issns2['journal'] = issns2['journal'].astype(int) - - -# In[25]: - - -# convertir l'index en id -issns2 = issns2.reset_index() -issns2['id'] = issns2['index'] + 1 -del issns2['index'] -issns2 - - -# In[26]: - - -issns2['issn_type'] = issns2['issn_type'].astype(int) - - -# In[27]: - - -# supprimer les doublons par ISSN -issns2 = issns2.drop_duplicates(subset='issn') -issns2 - - -# In[28]: - - -# export csv -issns2.to_csv('sample/issn_brut.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[29]: - - -# export excel -issns2.to_excel('sample/issn_brut.xlsx', index=False) - - -# In[30]: - - -# export CSV des IDs -issns2[['id', 'issn', 'issnl', 'journal']].to_csv('sample/issn_ids.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[31]: - - -# export excel des IDs -issns2[['id', 'issn', 'issnl', 'journal']].to_excel('sample/issn_ids.xlsx', index=False) - diff --git a/import_scripts/06_oacct_sherpa.md b/import_scripts/06_oacct_sherpa.md deleted file mode 100644 index b7077461..00000000 --- a/import_scripts/06_oacct_sherpa.md +++ /dev/null @@ -1,9819 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données de Sherpa/Romeo obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Données de Sherpa/Romeo - -### Exemple - -https://v2.sherpa.ac.uk/cgi/retrieve_by_id?item-type=publication&api-key=EEE6F146-678E-11EB-9C3A-202F3DE2659A&format=Json&identifier=17601 - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) -``` - -## Table publisher_sherpa - - -```python -# creation du DF -col_names = ['journal', - 'publisher_id', - 'name', - 'country', - 'type', - 'url' - ] -publisher_sherpa = pd.DataFrame(columns = col_names) -publisher_sherpa -``` - - - - -
- - - - - - - - - - - - - - - -
journalpublisher_idnamecountrytypeurl
-
- - - -## Table sherpa match issn - - -```python -# creation du DF -col_names = ['issn', - 'sherpa_match', - ] -sherpa_match_issn = pd.DataFrame(columns = col_names) -sherpa_match_issn -``` - - - - -
- - - - - - - - - - - -
issnsherpa_match
-
- - - -## Table sherpa issns - - -```python -# creation du DF -col_names = ['issn', - 'type', - ] -sherpa_issn = pd.DataFrame(columns = col_names) -sherpa_issn -``` - - - - -
- - - - - - - - - - - -
issntype
-
- - - -## Table sherpa journals - - -```python -# creation du DF -col_names = ['journal', - 'title', - 'url', - ] -sherpa_journal = pd.DataFrame(columns = col_names) -sherpa_journal -``` - - - - -
- - - - - - - - - - - - -
journaltitleurl
-
- - - -## Import table Journals et ISSN - - -```python -journal = pd.read_csv('sample/journals_publishers_brut.tsv', encoding='utf-8', header=0, sep='\t') -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitlestarting_yearend_yearurlname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_statuspublisher
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN11
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.012
231932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1.0Yes1.0PLoS One1.0NaN0.0NaN0.04.035714NaN53
342174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN14, 5
451098-01211098-0121Physical review. B, Condensed matter and mater...19982015http://ojps.aip.org/prbo/Phys. rev., B, Condens. matter mater. phys.124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.016
...........................................................................
9069970964-17260964-1726Smart materials and structures (Print)19929999NaNSmart mater. struct. (Print)124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN147
9079980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN175
9089991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18
90910000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN1119
91010010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN1217
-

911 rows × 24 columns

-
- - - - -```python -issn = pd.read_csv('sample/issn_brut.tsv', encoding='utf-8', header=0, sep='\t') -issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeid
00001-28150001-2815532PRINT11
11399-00390001-2815532NaN32
20001-48420001-4842498PRINT13
31520-48980001-4842498NaN34
40001-49660001-4966789PRINT15
.....................
17552470-00452470-0045533OTHER31756
17562470-00532470-0045533NaN31757
17572475-99532475-9953608ELECTRONIC21758
17582504-44272504-4427994PRINT11759
17592504-44352504-4427994NaN31760
-

1760 rows × 6 columns

-
- - - - -```python -issn_ids = pd.read_csv('sample/issn_ids.tsv', encoding='utf-8', header=0, sep='\t') -issn_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnljournal
010001-28150001-2815532
121399-00390001-2815532
230001-48420001-4842498
341520-48980001-4842498
450001-49660001-4966789
...............
175517562470-00452470-0045533
175617572470-00532470-0045533
175717582475-99532475-9953608
175817592504-44272504-4427994
175917602504-44352504-4427994
-

1760 rows × 4 columns

-
- - - -## Extraction de Sherpa Romeo - - -```python -# extraction des informations à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # if (((index/10) - int(index/10)) == 0) : - # print(index) - # initialisation des variables à extraire - publisher_id = np.nan - publisher_name = '' - publisher_country = '' - publisher_type = '' - publisher_url = '' - # boucle des fichiers json - # test d'existance du fichier - # print(row['issn']) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - if (len(data['items']) > 0): - publisher_id = data['items'][0]['publishers'][0]['publisher']['id'] - if ('country' in data['items'][0]['publishers'][0]['publisher']): - publisher_country = data['items'][0]['publishers'][0]['publisher']['country'] - if ('relationship_type' in data['items'][0]['publishers'][0]): - publisher_type = data['items'][0]['publishers'][0]['relationship_type'] - if ('url' in data['items'][0]['publishers'][0]['publisher']): - publisher_url = data['items'][0]['publishers'][0]['publisher']['url'] - if ('name' in data['items'][0]['publishers'][0]['publisher']['name'][0]): - publisher_name = data['items'][0]['publishers'][0]['publisher']['name'][0]['name'] - sherpa_match = 'OK' - publisher_sherpa = publisher_sherpa.append({'journal' : journal_id, 'publisher_id' : publisher_id, - 'name' : publisher_name, 'country' : publisher_country, - 'type' : publisher_type, 'url' : publisher_url}, ignore_index=True) - else : - print(row['issn'] + ' - trouvé mais vide') - sherpa_match = 'empty' - else : - print(row['issn'] + ' - pas trouvé') - sherpa_match = 'missing' - sherpa_match_issn = sherpa_match_issn.append({'issn' : row['issn'], 'sherpa_match' : sherpa_match}, ignore_index=True) -``` - - 1399-0039 - pas trouvé - 1520-8524 - trouvé mais vide - 1520-9024 - pas trouvé - 1468-2834 - pas trouvé - 1551-2916 - pas trouvé - 1943-2984 - pas trouvé - 1555-7162 - trouvé mais vide - 2163-5773 - pas trouvé - 1873-4324 - trouvé mais vide - 1526-7598 - pas trouvé - 1673-3134 - pas trouvé - 1777-5884 - pas trouvé - 1528-1140 - pas trouvé - 1468-2060 - pas trouvé - 1552-6259 - pas trouvé - 0003-6935 - trouvé mais vide - 1520-8842 - pas trouvé - 0003-9926 - trouvé mais vide - 1538-3679 - pas trouvé - 0003-9942 - trouvé mais vide - 1538-3687 - pas trouvé - 1529-0131 - pas trouvé - 1090-2104 - trouvé mais vide - 1943-295X - pas trouvé - 1878-2434 - pas trouvé - 1873-2402 - trouvé mais vide - 1872-6240 - trouvé mais vide - 1365-2133 - pas trouvé - 0007-4403 - trouvé mais vide - 1968-3766 - pas trouvé - 0008-042X - trouvé mais vide - 2104-3329 - pas trouvé - 2268-7963 - pas trouvé - 1873-3948 - trouvé mais vide - 1873-4405 - trouvé mais vide - 1872-6836 - trouvé mais vide - 1873-4448 - trouvé mais vide - 1524-4571 - trouvé mais vide - 1873-7838 - trouvé mais vide - 1879-2944 - trouvé mais vide - 1873-3840 - trouvé mais vide - 1973-8102 - trouvé mais vide - 0011-1600 - trouvé mais vide - 1968-3901 - pas trouvé - 1879-2235 - trouvé mais vide - 1095-564X - trouvé mais vide - 1931-3543 - pas trouvé - 1385-013X - trouvé mais vide - 1873-3859 - trouvé mais vide - 1873-7315 - trouvé mais vide - 0013-8584 - trouvé mais vide - 2309-4672 - pas trouvé - 0014-2239 - trouvé mais vide - 2272-9011 - pas trouvé - 0945-5795 - pas trouvé - 1432-1033 - pas trouvé - 1365-2362 - pas trouvé - 1090-2422 - trouvé mais vide - 1026-7484 - trouvé mais vide - 1528-0012 - trouvé mais vide - 1872-9533 - trouvé mais vide - 0016-9161 - trouvé mais vide - 2297-7953 - pas trouvé - 1879-2189 - trouvé mais vide - 0018-0238 - trouvé mais vide - 2297-1971 - pas trouvé - 2334-3303 - pas trouvé - 1070-6313 - pas trouvé - 1873-3255 - trouvé mais vide - 1097-0215 - pas trouvé - 1879-2146 - trouvé mais vide - 0021-8170 - trouvé mais vide - 2114-6292 - pas trouvé - 1090-266X - trouvé mais vide - 1520-8850 - trouvé mais vide - 1879-1484 - trouvé mais vide - 1067-8832 - pas trouvé - 1067-8816 - pas trouvé - 1873-2380 - trouvé mais vide - 1090-2694 - trouvé mais vide - 1520-9032 - pas trouvé - 1873-3778 - trouvé mais vide - 1945-7197 - pas trouvé - 0021-9797 - trouvé mais vide - 1090-2716 - trouvé mais vide - 1873-5002 - pas trouvé - 0022-0728 - trouvé mais vide - 1879-2707 - trouvé mais vide - 1872-7883 - trouvé mais vide - 1527-2427 - trouvé mais vide - 1089-8638 - trouvé mais vide - 1873-4820 - trouvé mais vide - 1872-8561 - trouvé mais vide - 1531-5037 - trouvé mais vide - 1085-8695 - pas trouvé - 1097-6833 - pas trouvé - 1879-2553 - trouvé mais vide - 1097-6841 - pas trouvé - 2050-5639 - pas trouvé - 1873-4782 - trouvé mais vide - 1878-5883 - trouvé mais vide - 1085-8687 - pas trouvé - 1097-685X - pas trouvé - 1070-6321 - pas trouvé - 1091-756X - pas trouvé - 1939-5590 - trouvé mais vide - 1939-5604 - pas trouvé - 1873-1856 - trouvé mais vide - 1872-6143 - pas trouvé - 0025-6749 - trouvé mais vide - 1423-0356 - pas trouvé - 0026-4598 - pas trouvé - 1432-1874 - pas trouvé - 0027-4054 - trouvé mais vide - 1873-3514 - trouvé mais vide - 1873-0310 - trouvé mais vide - 1872-616X - pas trouvé - 1402-4896 - pas trouvé - 0031-8965 - trouvé mais vide - 1521-396X - pas trouvé - 1092-0145 - trouvé mais vide - 1873-3700 - pas trouvé - 1532-2548 - pas trouvé - 1527-2400 - trouvé mais vide - 0035-1121 - trouvé mais vide - 1760-7426 - pas trouvé - 0035-1784 - trouvé mais vide - 2297-1254 - pas trouvé - 0035-3655 - trouvé mais vide - 2104-385X - pas trouvé - 0036-7486 - trouvé mais vide - 1424-4004 - trouvé mais vide - 0036-7672 - trouvé mais vide - 0036-7699 - trouvé mais vide - 0036-7893 - trouvé mais vide - 2504-1452 - pas trouvé - 1471-1257 - pas trouvé - 1879-2766 - trouvé mais vide - 1879-2405 - trouvé mais vide - 1879-2758 - trouvé mais vide - 1464-5416 - pas trouvé - 1873-3581 - pas trouvé - 1664-2864 - pas trouvé - 1879-2731 - pas trouvé - 1534-6080 - trouvé mais vide - 1873-2623 - pas trouvé - 1096-0341 - trouvé mais vide - 1878-5646 - trouvé mais vide - 1879-2448 - pas trouvé - 1879-1298 - trouvé mais vide - 1879-2138 - trouvé mais vide - 0046-2497 - trouvé mais vide - 1776-2936 - pas trouvé - 1873-7625 - trouvé mais vide - 1879-2472 - pas trouvé - 2214-8019 - trouvé mais vide - 0065-7727 - trouvé mais vide - 1070-6283 - pas trouvé - 0066-6653 - trouvé mais vide - 0072-0585 - trouvé mais vide - 1079-2376 - pas trouvé - 1557-7988 - trouvé mais vide - 0081-1254 - trouvé mais vide - 1523-1755 - pas trouvé - 1085-8725 - pas trouvé - 1097-6825 - trouvé mais vide - 1096-0260 - pas trouvé - 1522-8541 - pas trouvé - 1551-7616 - pas trouvé - 1935-0465 - pas trouvé - 1070-633X - pas trouvé - 1873-4375 - trouvé mais vide - 1070-6291 - pas trouvé - 0108-2701 - trouvé mais vide - 1600-5759 - pas trouvé - 1879-0097 - pas trouvé - 1879-2081 - pas trouvé - 1873-7323 - trouvé mais vide - 1879-3452 - trouvé mais vide - 1878-5905 - trouvé mais vide - 1532-1991 - pas trouvé - 1071-2763 - pas trouvé - 1071-8842 - pas trouvé - 2156-2202 - pas trouvé - 1081-1281 - pas trouvé - 1873-7528 - trouvé mais vide - 1773-0406 - trouvé mais vide - 0151-0193 - trouvé mais vide - 2101-0218 - trouvé mais vide - 0161-7567 - trouvé mais vide - 2160-9292 - trouvé mais vide - 1095-3795 - trouvé mais vide - 1872-678X - trouvé mais vide - 1573-2517 - pas trouvé - 1872-7557 - trouvé mais vide - 1872-7123 - trouvé mais vide - 1872-7441 - trouvé mais vide - 1872-7999 - pas trouvé - 1879-1514 - pas trouvé - 1874-1754 - trouvé mais vide - 1872-7697 - trouvé mais vide - 1873-5568 - trouvé mais vide - 1872-7352 - pas trouvé - 1872-9584 - trouvé mais vide - 1600-0641 - trouvé mais vide - 1872-9576 - trouvé mais vide - 1873-5460 - pas trouvé - 1873-5584 - trouvé mais vide - 1872-695X - pas trouvé - 1432-0827 - pas trouvé - 1432-1262 - pas trouvé - 0181-5512 - trouvé mais vide - 1773-0597 - pas trouvé - 1879-2367 - trouvé mais vide - 1532-2939 - trouvé mais vide - 1527-3296 - pas trouvé - 1558-1497 - trouvé mais vide - 0221-5918 - trouvé mais vide - 0248-8663 - trouvé mais vide - 1768-3122 - trouvé mais vide - 0252-1881 - trouvé mais vide - 0252-2969 - trouvé mais vide - 1661-5468 - pas trouvé - 0254-945X - trouvé mais vide - 1662-9760 - pas trouvé - 0255-9005 - trouvé mais vide - 0258-6800 - trouvé mais vide - 1432-0819 - pas trouvé - 0259-6199 - trouvé mais vide - 1661-3171 - trouvé mais vide - 1532-1983 - pas trouvé - 1873-2518 - trouvé mais vide - 1365-2346 - pas trouvé - 1476-5365 - pas trouvé - 1067-8824 - pas trouvé - 0271-4302 - trouvé mais vide - 2158-1525 - pas trouvé - 1536-4801 - pas trouvé - 1873-457X - pas trouvé - 1531-5053 - pas trouvé - 1470-8752 - pas trouvé - 1879-176X - pas trouvé - 1873-4421 - pas trouvé - 1432-1998 - pas trouvé - 1873-6246 - pas trouvé - 1873-6777 - pas trouvé - 1879-3533 - trouvé mais vide - 1872-8057 - trouvé mais vide - 1872-7972 - trouvé mais vide - 1879-2723 - trouvé mais vide - 1879-2774 - pas trouvé - 1873-4766 - trouvé mais vide - 1362-4954 - pas trouvé - 1365-2842 - pas trouvé - 1361-6447 - trouvé mais vide - 1872-9118 - trouvé mais vide - 1873-7544 - trouvé mais vide - 1873-3360 - pas trouvé - 1873-2100 - pas trouvé - 1872-9657 - trouvé mais vide - 1499-2752 - pas trouvé - 2567-689X - trouvé mais vide - 1432-1238 - pas trouvé - 1873-684X - trouvé mais vide - 1879-355X - trouvé mais vide - 1879-3487 - trouvé mais vide - 1873-6785 - trouvé mais vide - 1546-3141 - pas trouvé - 0362-1340 - trouvé mais vide - 1523-2867 - pas trouvé - 1558-1160 - trouvé mais vide - 1432-2323 - pas trouvé - 0365-7116 - trouvé mais vide - 1873-2526 - pas trouvé - 0368-4466 - trouvé mais vide - 1588-2926 - pas trouvé - 0369-3392 - trouvé mais vide - 1873-2445 - trouvé mais vide - 0373-2525 - trouvé mais vide - 0373-2967 - trouvé mais vide - 2235-3658 - pas trouvé - 0373-6156 - trouvé mais vide - 2391-1336 - pas trouvé - 0374-4256 - trouvé mais vide - 0375-1457 - trouvé mais vide - 2419-8196 - pas trouvé - 1873-2429 - trouvé mais vide - 1872-6097 - pas trouvé - 1872-6860 - trouvé mais vide - 1574-6968 - pas trouvé - 1879-0038 - trouvé mais vide - 1873-3476 - trouvé mais vide - 1873-2755 - trouvé mais vide - 1872-6178 - trouvé mais vide - 1873-2046 - trouvé mais vide - 1872-6283 - trouvé mais vide - 0398-3412 - trouvé mais vide - 2297-5810 - pas trouvé - 0409-8757 - trouvé mais vide - 1461-7412 - pas trouvé - 1873-1562 - trouvé mais vide - 1089-4918 - trouvé mais vide - 1538-4500 - pas trouvé - 0570-0833 - trouvé mais vide - 0583-8401 - trouvé mais vide - 1872-7727 - trouvé mais vide - 1873-264X - trouvé mais vide - 1527-7755 - pas trouvé - 1520-8559 - trouvé mais vide - 1558-3597 - trouvé mais vide - 1873-5134 - pas trouvé - 1096-3677 - pas trouvé - 2213-0276 - pas trouvé - 1958-5381 - pas trouvé - 1651-2227 - pas trouvé - 0884-1616 - trouvé mais vide - 1091-8876 - pas trouvé - 1092-8928 - pas trouvé - 1089-8646 - pas trouvé - 0888-8809 - trouvé mais vide - 1944-9917 - trouvé mais vide - 1532-0987 - pas trouvé - 0894-8275 - trouvé mais vide - 1878-5921 - pas trouvé - 1520-636X - pas trouvé - 1399-3038 - pas trouvé - 1873-7196 - trouvé mais vide - 1873-4308 - trouvé mais vide - 1573-2509 - trouvé mais vide - 1879-0658 - trouvé mais vide - 1873-2135 - pas trouvé - 1873-2143 - pas trouvé - 1873-4936 - trouvé mais vide - 1873-4944 - pas trouvé - 1872-793X - trouvé mais vide - 1873-3069 - pas trouvé - 1872-8286 - trouvé mais vide - 1873-3077 - pas trouvé - 1873-4669 - trouvé mais vide - 1873-3883 - trouvé mais vide - 0926-9630 - trouvé mais vide - 1879-8365 - trouvé mais vide - 1879-3398 - trouvé mais vide - 1873-4359 - trouvé mais vide - 1879-0720 - trouvé mais vide - 1769-664X - pas trouvé - 1432-2218 - pas trouvé - 1866-6817 - pas trouvé - 1432-2277 - pas trouvé - 1435-4373 - pas trouvé - 1433-2965 - pas trouvé - 1873-3441 - pas trouvé - 1362-3044 - pas trouvé - 1879-0526 - trouvé mais vide - 1879-0828 - pas trouvé - 1879-0410 - trouvé mais vide - 1873-619X - trouvé mais vide - 1873-4235 - trouvé mais vide - 1362-511X - pas trouvé - 1879-0429 - trouvé mais vide - 1879-1786 - trouvé mais vide - 1879-0852 - pas trouvé - 1879-0682 - pas trouvé - 1873-2976 - trouvé mais vide - 1464-3405 - trouvé mais vide - 1466-1861 - pas trouvé - 1555-3892 - pas trouvé - 1360-0443 - pas trouvé - 1464-3391 - trouvé mais vide - 1879-2359 - pas trouvé - 0992-986X - trouvé mais vide - 2119-4130 - pas trouvé - 0995-3817 - trouvé mais vide - 2219-2840 - pas trouvé - 1010-2248 - trouvé mais vide - 1664-9885 - pas trouvé - 1873-2666 - pas trouvé - 1017-0588 - trouvé mais vide - 1018-7987 - trouvé mais vide - 1019-0406 - trouvé mais vide - 1023-2044 - trouvé mais vide - 1023-9332 - trouvé mais vide - 2235-1884 - pas trouvé - 1560-7917 - pas trouvé - 1026-7530 - pas trouvé - 1607-8489 - pas trouvé - 1127-2236 - pas trouvé - 1938-808X - pas trouvé - 1095-8657 - trouvé mais vide - 1536-3732 - pas trouvé - 1049-5258 - trouvé mais vide - 1538-4446 - pas trouvé - 1095-9572 - trouvé mais vide - 1532-6500 - trouvé mais vide - 1059-1524 - trouvé mais vide - 1095-3787 - trouvé mais vide - 1538-4519 - trouvé mais vide - 1063-6919 - trouvé mais vide - 2332-564X - pas trouvé - 2575-7075 - pas trouvé - 1940-6029 - trouvé mais vide - 1527-2435 - pas trouvé - 1527-2419 - pas trouvé - 1071-1023 - trouvé mais vide - 1520-8567 - pas trouvé - 1090-235X - trouvé mais vide - 1532-2130 - pas trouvé - 1096-0856 - trouvé mais vide - 1538-4489 - pas trouvé - 1155-4339 - trouvé mais vide - 1764-7177 - pas trouvé - 1460-9592 - pas trouvé - 1878-3511 - pas trouvé - 1778-7254 - pas trouvé - 1873-4030 - pas trouvé - 1873-2844 - trouvé mais vide - 1873-5126 - trouvé mais vide - 1873-5606 - pas trouvé - 1873-2453 - trouvé mais vide - 1872-8456 - pas trouvé - 2040-2058 - pas trouvé - 1878-5840 - trouvé mais vide - 1473-6519 - pas trouvé - 1879-0690 - trouvé mais vide - 1466-609X - pas trouvé - 1367-4811 - trouvé mais vide - 1873-4286 - pas trouvé - 1873-3212 - trouvé mais vide - 1873-1759 - pas trouvé - 1875-8908 - trouvé mais vide - 1872-8952 - trouvé mais vide - 1873-1902 - trouvé mais vide - 1600-0854 - pas trouvé - 1420-5556 - trouvé mais vide - 1420-7192 - trouvé mais vide - 1662-0879 - pas trouvé - 1422-2019 - trouvé mais vide - 1422-3449 - trouvé mais vide - 1422-5778 - trouvé mais vide - 2504-1436 - pas trouvé - 1423-3967 - trouvé mais vide - 1663-3997 - pas trouvé - 1424-1811 - trouvé mais vide - 2504-1460 - pas trouvé - 1424-4020 - pas trouvé - 1424-7410 - trouvé mais vide - 1424-7755 - trouvé mais vide - 1436-3771 - pas trouvé - 1434-6028 - trouvé mais vide - 1434-6036 - trouvé mais vide - 1439-4456 - pas trouvé - 1449-8979 - pas trouvé - 1873-6416 - trouvé mais vide - 1465-6914 - trouvé mais vide - 1478-6362 - pas trouvé - 1520-6149 - trouvé mais vide - 2379-190X - trouvé mais vide - 1522-1601 - pas trouvé - 1708-8208 - pas trouvé - 1944-7884 - pas trouvé - 1527-6473 - pas trouvé - 1947-3893 - pas trouvé - 1530-1591 - trouvé mais vide - 1558-1101 - pas trouvé - 1860-2002 - pas trouvé - 1552-5279 - pas trouvé - 1557-170X - trouvé mais vide - 1878-5530 - trouvé mais vide - 1878-1519 - trouvé mais vide - 1569-9293 - pas trouvé - 1873-376X - pas trouvé - 1720-8319 - pas trouvé - 1610-0379 - trouvé mais vide - 1610-0387 - pas trouvé - 1778-3569 - trouvé mais vide - 1660-3362 - trouvé mais vide - 1660-9379 - trouvé mais vide - 1660-9603 - trouvé mais vide - 1661-1179 - trouvé mais vide - 1661-2620 - trouvé mais vide - 1661-464X - trouvé mais vide - 1661-4941 - trouvé mais vide - 1661-8165 - pas trouvé - 1662-551X - pas trouvé - 1662-5536 - trouvé mais vide - 1662-6001 - trouvé mais vide - 1662-601X - pas trouvé - 1662-8705 - trouvé mais vide - 1777-5477 - trouvé mais vide - 1810-7621 - pas trouvé - 1863-2300 - pas trouvé - 1873-2763 - trouvé mais vide - 1876-7737 - pas trouvé - 1878-8769 - trouvé mais vide - 1939-5175 - trouvé mais vide - 1945-7928 - trouvé mais vide - 1945-7936 - pas trouvé - 1945-8452 - trouvé mais vide - 1992-2655 - trouvé mais vide - 2050-7534 - trouvé mais vide - 2101-6275 - pas trouvé - 2161-2129 - pas trouvé - 2160-5033 - trouvé mais vide - 2160-5041 - pas trouvé - 2160-9020 - trouvé mais vide - 2160-9047 - pas trouvé - 2164-3342 - trouvé mais vide - 2174-8454 - trouvé mais vide - 2340-115X - pas trouvé - 2211-3282 - trouvé mais vide - 2264-7228 - trouvé mais vide - 2297-0703 - trouvé mais vide - 2297-6981 - trouvé mais vide - 2297-7007 - pas trouvé - 2352-1791 - trouvé mais vide - 2504-4427 - trouvé mais vide - 2504-4435 - trouvé mais vide - - - -```python -publisher_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idnamecountrytypeurl
053245John Wiley and Sonsgbformer_publisherhttp://www.wiley.com/
14984American Chemical Societyussociety_publisherhttp://pubs.acs.org/
24984American Chemical Societyussociety_publisherhttp://pubs.acs.org/
3789126Acoustical Society of Americaussociety_publisherhttp://acousticalsociety.org/
41663291Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
.....................
12388010American Physical Societyussociety_publisherhttp://www.aps.org/
12398010American Physical Societyussociety_publisherhttp://www.aps.org/
124053310American Physical Societyussociety_publisherhttp://www.aps.org/
124153310American Physical Societyussociety_publisherhttp://www.aps.org/
124260810American Physical Societyussociety_publisherhttp://www.aps.org/
-

1243 rows × 6 columns

-
- - - - -```python -sherpa_match_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnsherpa_match
00001-2815OK
11399-0039missing
20001-4842OK
31520-4898OK
40001-4966OK
.........
17552470-0045OK
17562470-0053OK
17572475-9953OK
17582504-4427empty
17592504-4435empty
-

1760 rows × 2 columns

-
- - - - -```python -# dedup -publisher_sherpa_dedup = publisher_sherpa.drop_duplicates() -publisher_sherpa_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idnamecountrytypeurl
053245John Wiley and Sonsgbformer_publisherhttp://www.wiley.com/
14984American Chemical Societyussociety_publisherhttp://pubs.acs.org/
3789126Acoustical Society of Americaussociety_publisherhttp://acousticalsociety.org/
41663291Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
68073291Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
.....................
123587010American Physical Societyussociety_publisherhttp://www.aps.org/
12364110American Physical Societyussociety_publisherhttp://www.aps.org/
12388010American Physical Societyussociety_publisherhttp://www.aps.org/
124053310American Physical Societyussociety_publisherhttp://www.aps.org/
124260810American Physical Societyussociety_publisherhttp://www.aps.org/
-

808 rows × 6 columns

-
- - - - -```python -sherpa_match_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnsherpa_match
00001-2815OK
11399-0039missing
20001-4842OK
31520-4898OK
40001-4966OK
.........
17552470-0045OK
17562470-0053OK
17572475-9953OK
17582504-4427empty
17592504-4435empty
-

1760 rows × 2 columns

-
- - - - -```python -# ajout du issnl et du titre -sherpa_match_issn = pd.merge(sherpa_match_issn, issn_ids, on='issn', how='left') -sherpa_match_issn = pd.merge(sherpa_match_issn, journal[['issnl', 'title']], on='issnl', how='left') -sherpa_match_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnsherpa_matchidissnljournaltitle
00001-2815OK10001-2815532Tissue antigens
11399-0039missing20001-2815532Tissue antigens
20001-4842OK30001-4842498Accounts of chemical research
31520-4898OK40001-4842498Accounts of chemical research
40001-4966OK50001-4966789The Journal of the Acoustical Society of America
.....................
17552470-0045OK17562470-0045533Physical review. E (Print)
17562470-0053OK17572470-0045533Physical review. E (Print)
17572475-9953OK17582475-9953608Physical review materials
17582504-4427empty17592504-4427994GG@G (Print)
17592504-4435empty17602504-4427994GG@G (Print)
-

1760 rows × 6 columns

-
- - - - -```python -sherpa_match_results = sherpa_match_issn[['id', 'issnl', 'sherpa_match']].groupby(['issnl', 'sherpa_match']).count() -sherpa_match_results -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id
issnlsherpa_match
0001-2815OK1
missing1
0001-4842OK2
0001-4966OK1
empty1
.........
2469-9950OK2
2470-0010OK2
2470-0045OK2
2475-9953OK1
2504-4427empty2
-

1302 rows × 1 columns

-
- - - - -```python -sherpa_match_results = sherpa_match_results.reset_index() -sherpa_match_results -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnlsherpa_matchid
00001-2815OK1
10001-2815missing1
20001-4842OK2
30001-4966OK1
40001-4966empty1
............
12972469-9950OK2
12982470-0010OK2
12992470-0045OK2
13002475-9953OK1
13012504-4427empty2
-

1302 rows × 3 columns

-
- - - - -```python -sherpa_match_results_ok = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'OK'] -issn_ids_issnl = issn_ids[['issnl', 'journal']].drop_duplicates(subset='issnl') -issn_ids_issnl = pd.merge(issn_ids_issnl, sherpa_match_results_ok, on='issnl', how='left') -issn_ids_issnl = pd.merge(issn_ids_issnl, journal[['issnl', 'title']], on='issnl', how='left') -issn_ids_issnl -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnljournalsherpa_matchidtitle
00001-2815532OK1.0Tissue antigens
10001-4842498OK2.0Accounts of chemical research
20001-4966789OK1.0The Journal of the Acoustical Society of America
30001-6268166OK2.0Acta neurochirurgica
40001-6322807OK2.0Acta neuropathologica
..................
9042469-995041OK2.0Physical review. B
9052470-001080OK2.0Physical review. D
9062470-0045533OK2.0Physical review. E (Print)
9072475-9953608OK1.0Physical review materials
9082504-4427994NaNNaNGG@G (Print)
-

909 rows × 5 columns

-
- - - - -```python -journals_not_sherpa = issn_ids_issnl.loc[issn_ids_issnl['sherpa_match'].isna()] -journals_not_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnljournalsherpa_matchidtitle
240003-6935398NaNNaNApplied optics
270003-9926605NaNNaNArchives of internal medicine (1960)
280003-9942974NaNNaNArchives of neurology (Chicago)
470007-4403885NaNNaNBulletin de psychologie
480008-042X180NaNNaNCahiers pédagogiques (Revue)
..................
8892264-7228503NaNNaNDistances et médiations des savoirs
8922297-0703989NaNNaNSchweizer Krebs-Bulletin
8932297-6981618NaNNaNSwiss archives of neurology, psychiatry and ps...
8982352-1791639NaNNaNNuclear materials and energy
9082504-4427994NaNNaNGG@G (Print)
-

101 rows × 5 columns

-
- - - - -```python -sherpa_match_results_empty = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'empty'] -sherpa_match_results_missing = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'missing'] -del journals_not_sherpa['sherpa_match'] -del journals_not_sherpa['id'] -journals_not_sherpa = pd.merge(journals_not_sherpa, sherpa_match_results_empty, on='issnl', how='left') -del journals_not_sherpa['id'] -journals_not_sherpa = pd.merge(journals_not_sherpa, sherpa_match_results_missing, on='issnl', how='left') -del journals_not_sherpa['id'] -journals_not_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnljournaltitlesherpa_match_xsherpa_match_y
00003-6935398Applied opticsemptyNaN
10003-9926605Archives of internal medicine (1960)emptymissing
20003-9942974Archives of neurology (Chicago)emptymissing
30007-4403885Bulletin de psychologieemptymissing
40008-042X180Cahiers pédagogiques (Revue)emptymissing
..................
962264-7228503Distances et médiations des savoirsemptyNaN
972297-0703989Schweizer Krebs-BulletinemptyNaN
982297-6981618Swiss archives of neurology, psychiatry and ps...emptymissing
992352-1791639Nuclear materials and energyemptyNaN
1002504-4427994GG@G (Print)emptyNaN
-

101 rows × 5 columns

-
- - - - -```python -# extraction des informations des journaux à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # test d'existance du fichier - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - title = np.nan - url = np.nan - if (len(data['items']) > 0): - if ('url' in data['items'][0]): - url = data['items'][0]['url'] - if ('title' in data['items'][0]['title'][0]): - title = data['items'][0]['title'][0]['title'] - sherpa_journal = sherpa_journal.append({'journal' : journal_id, 'title' : title, 'url' : url}, ignore_index=True) -``` - - 0 - 10 - 20 - 30 - 40 - 50 - 60 - 70 - 80 - 90 - 100 - 110 - 120 - 130 - 140 - 150 - 160 - 170 - 180 - 190 - 200 - 210 - 220 - 230 - 240 - 250 - 260 - 270 - 280 - 290 - 300 - 310 - 320 - 330 - 340 - 350 - 360 - 370 - 380 - 390 - 400 - 410 - 420 - 430 - 440 - 450 - 460 - 470 - 480 - 490 - 500 - 510 - 520 - 530 - 540 - 550 - 560 - 570 - 580 - 590 - 600 - 610 - 620 - 630 - 640 - 650 - 660 - 670 - 680 - 690 - 700 - 710 - 720 - 730 - 740 - 750 - 760 - 770 - 780 - 790 - 800 - 810 - 820 - 830 - 840 - 850 - 860 - 870 - 880 - 890 - 900 - 910 - 920 - 930 - 940 - 950 - 960 - 970 - 980 - 990 - 1000 - 1010 - 1020 - 1030 - 1040 - 1050 - 1060 - 1070 - 1080 - 1090 - 1100 - 1110 - 1120 - 1130 - 1140 - 1150 - 1160 - 1170 - 1180 - 1190 - 1200 - 1210 - 1220 - 1230 - 1240 - 1250 - 1260 - 1270 - 1280 - 1290 - 1300 - 1310 - 1320 - 1330 - 1340 - 1350 - 1360 - 1370 - 1380 - 1390 - 1400 - 1410 - 1420 - 1430 - 1440 - 1450 - 1460 - 1470 - 1480 - 1490 - 1500 - 1510 - 1520 - 1530 - 1540 - 1550 - 1560 - 1570 - 1580 - 1590 - 1600 - 1610 - 1620 - 1630 - 1640 - 1650 - 1660 - 1670 - 1680 - 1690 - 1700 - 1710 - 1720 - 1730 - 1740 - 1750 - - - -```python -sherpa_journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journaltitleurl
0532Tissue Antigenshttp://onlinelibrary.wiley.com/journal/10.1111...
1498Accounts of Chemical Researchhttp://pubs.acs.org/journal/achre4
2498Accounts of Chemical Researchhttp://pubs.acs.org/journal/achre4
3789The Journal of the Acoustical Society of Americahttp://asa.scitation.org/journal/jas
4166Acta Neurochirurgicahttp://link.springer.com/journal/701
............
123880Physical Review Dhttp://prd.aps.org/
123980Physical Review Dhttp://prd.aps.org/
1240533Physical Review Ehttp://journals.aps.org/pre/abstract/10.1103/P...
1241533Physical Review Ehttp://journals.aps.org/pre/abstract/10.1103/P...
1242608Physical Review Materialshttp://journals.aps.org/prmaterials/
-

1243 rows × 3 columns

-
- - - - -```python -# extraction des informations à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # test d'existance du fichier - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - myissn = np.nan - mytype = np.nan - data = json.load(f) - if (len(data['items']) > 0): - if ('issns' in data['items'][0]): - issns = data['items'][0]['issns'] - for i in issns: - if ('issn' in i): - myissn = i['issn'] - if ('type' in i): - mytype = i['type'] - sherpa_issn = sherpa_issn.append({'issn' : myissn, 'type' : mytype}, ignore_index=True) -``` - - 0 - 10 - 20 - 30 - 40 - 50 - 60 - 70 - 80 - 90 - 100 - 110 - 120 - 130 - 140 - 150 - 160 - 170 - 180 - 190 - 200 - 210 - 220 - 230 - 240 - 250 - 260 - 270 - 280 - 290 - 300 - 310 - 320 - 330 - 340 - 350 - 360 - 370 - 380 - 390 - 400 - 410 - 420 - 430 - 440 - 450 - 460 - 470 - 480 - 490 - 500 - 510 - 520 - 530 - 540 - 550 - 560 - 570 - 580 - 590 - 600 - 610 - 620 - 630 - 640 - 650 - 660 - 670 - 680 - 690 - 700 - 710 - 720 - 730 - 740 - 750 - 760 - 770 - 780 - 790 - 800 - 810 - 820 - 830 - 840 - 850 - 860 - 870 - 880 - 890 - 900 - 910 - 920 - 930 - 940 - 950 - 960 - 970 - 980 - 990 - 1000 - 1010 - 1020 - 1030 - 1040 - 1050 - 1060 - 1070 - 1080 - 1090 - 1100 - 1110 - 1120 - 1130 - 1140 - 1150 - 1160 - 1170 - 1180 - 1190 - 1200 - 1210 - 1220 - 1230 - 1240 - 1250 - 1260 - 1270 - 1280 - 1290 - 1300 - 1310 - 1320 - 1330 - 1340 - 1350 - 1360 - 1370 - 1380 - 1390 - 1400 - 1410 - 1420 - 1430 - 1440 - 1450 - 1460 - 1470 - 1480 - 1490 - 1500 - 1510 - 1520 - 1530 - 1540 - 1550 - 1560 - 1570 - 1580 - 1590 - 1600 - 1610 - 1620 - 1630 - 1640 - 1650 - 1660 - 1670 - 1680 - 1690 - 1700 - 1710 - 1720 - 1730 - 1740 - 1750 - - - -```python -sherpa_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntype
00001-2815print
11399-0039electronic
20001-4842print
31520-4898electronic
40001-4842print
.........
21962470-0045print
21972470-0053electronic
21982470-0045print
21992470-0053electronic
22002475-9953electronic
-

2201 rows × 2 columns

-
- - - - -```python -# dedup -sherpa_issn = sherpa_issn.drop_duplicates() -sherpa_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntype
00001-2815print
11399-0039electronic
20001-4842print
31520-4898electronic
60001-4966print
.........
21922470-0010print
21932470-0029electronic
21962470-0045print
21972470-0053electronic
22002475-9953electronic
-

1333 rows × 2 columns

-
- - - - -```python -# completer le fichier des issns avec les types de sherpa -issn2 = pd.merge(issn, sherpa_issn, on='issn', how='left') -issn2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11print
11399-00390001-2815532NaN32electronic
20001-48420001-4842498PRINT13print
31520-48980001-4842498NaN34electronic
40001-49660001-4966789PRINT15print
........................
17552470-00452470-0045533OTHER31756print
17562470-00532470-0045533NaN31757electronic
17572475-99532475-9953608ELECTRONIC21758electronic
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -# exports csv -publisher_sherpa_dedup.to_csv('sample/publisher_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -sherpa_match_issn.to_csv('sample/sherpa_match_issn.tsv', sep='\t', encoding='utf-8', index=False) -sherpa_journal.to_csv('sample/sherpa_journal.tsv', sep='\t', encoding='utf-8', index=False) -issn2.to_csv('sample/issn_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -journals_not_sherpa.to_csv('sample/journals_not_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# exports excel -publisher_sherpa_dedup.to_excel('sample/publisher_sherpa.xlsx', index=False) -sherpa_match_issn.to_excel('sample/sherpa_match_issn.xlsx', index=False) -sherpa_journal.to_excel('sample/sherpa_journal.xlsx', index=False) -issn2.to_excel('sample/issn_sherpa.xlsx', index=False) -journals_not_sherpa.to_excel('sample/journals_not_sherpa.xlsx', index=False) -``` - - -```python -# ajout des titres Sherpa a la table des revues -# renommer les colonnes -sherpa_journal = sherpa_journal.rename(columns={'journal' : 'id'}) -journal = pd.merge(journal, sherpa_journal, on='id', how='left') -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitle_xstarting_yearend_yearurl_xname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_statuspublishertitle_yurl_y
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN11NaNNaN
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.012Physical Review Lettershttp://prl.aps.org/
220031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.012Physical Review Lettershttp://prl.aps.org/
331932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1.0Yes1.0PLoS One1.0NaN0.0NaN0.04.035714NaN53PLoS ONEhttp://www.plosone.org/
442174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN14, 5NaNNaN
.................................................................................
13419980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN175Journal of Pediatric Surgeryhttp://www.jpedsurg.org/
13429991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18Probability Theory and Related Fieldshttp://www.springerlink.com/content/100451/?p=...
13439991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18Probability Theory and Related Fieldshttp://www.springerlink.com/content/100451/?p=...
134410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN1119Renewable Energyhttp://www.elsevier.com/wps/product/cws_home/9...
134510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN1217NaNNaN
-

1346 rows × 26 columns

-
- - - - -```python -# choix du titre et url -journal['url'] = journal['url_y'] -journal.loc[journal['url_y'].isna(), 'url'] = journal['url_x'] -journal['title'] = journal['title_y'] -journal.loc[journal['title_y'].isna(), 'title'] = journal['title_x'] -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnissnltitle_xstarting_yearend_yearurl_xname_short_iso_4languagecountrydoaj_titledoaj_sealAPCdoaj_statuslockss_titlelockssportico_statusporticonlch_titlenlchqoam_av_scoredoublon_issnloa_statuspublishertitle_yurl_yurltitle
011660-93791660-9379Revue médicale suisse20059999NaNRev. méd. suisse138215NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN11NaNNaNNaNRevue médicale suisse
120031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.012Physical Review Lettershttp://prl.aps.org/http://prl.aps.org/Physical Review Letters
220031-90070031-9007Physical review letters (Print)19589999http://prl.aps.org/Phys. rev. lett. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaN1.012Physical Review Lettershttp://prl.aps.org/http://prl.aps.org/Physical Review Letters
331932-62031932-6203PloS one20069999http://www.plosone.org/NaN124236PLoS ONE1.0Yes1.0PLoS One1.0NaN0.0NaN0.04.035714NaN53PLoS ONEhttp://www.plosone.org/http://www.plosone.org/PLoS ONE
442174-84542174-8454EU-topías20119999NaNEU-topías124, 138, 402, 292209NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN14, 5NaNNaNNaNEU-topías
.......................................................................................
13419980022-34680022-3468Journal of pediatric surgery (Print)19669999http://www.jpedsurg.orgJ. pediatr. surg. (Print)124236NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN175Journal of Pediatric Surgeryhttp://www.jpedsurg.org/http://www.jpedsurg.org/Journal of Pediatric Surgery
13429991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18Probability Theory and Related Fieldshttp://www.springerlink.com/content/100451/?p=...http://www.springerlink.com/content/100451/?p=...Probability Theory and Related Fields
13439991432-20640178-8051Probability theory and related fields (Internet)uuuu9999http://www.springerlink.com/content/100451Probab. theory relat. fields (Internet)12483NaNNaNNaN0.0Probability Theory and Related Fields1.0preserved1.0Probability Theory and Related Fields1.0NaNNaN18Probability Theory and Related Fieldshttp://www.springerlink.com/content/100451/?p=...http://www.springerlink.com/content/100451/?p=...Probability Theory and Related Fields
134410000960-14810960-1481Renewable energy19919999NaNRenew. energy124234NaNNaNNaN0.0NaN0.0preserved1.0NaN0.0NaNNaN1119Renewable Energyhttp://www.elsevier.com/wps/product/cws_home/9...http://www.elsevier.com/wps/product/cws_home/9...Renewable Energy
134510010161-75670161-7567Journal of applied physiology: respiratory, en...19771984https://www.physiology.org/journal/japplJ. appl. physiol.: respir., environ. exercise ...124236NaNNaNNaN0.0NaN0.0NaN0.0NaN0.0NaNNaN1217NaNNaNhttps://www.physiology.org/journal/japplJournal of applied physiology: respiratory, en...
-

1346 rows × 28 columns

-
- - - - -```python -journals_export = journal[['id', 'title', 'name_short_iso_4', 'starting_year', 'end_year', 'url', 'country', 'language', 'oa_status', 'publisher', 'doaj_seal', 'doaj_status', 'lockss', 'portico', 'nlch', 'qoam_av_score']] -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idtitlename_short_iso_4starting_yearend_yearurlcountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
01Revue médicale suisseRev. méd. suisse20059999NaN21513811NaN0.00.00.00.0NaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/23612412NaN0.00.01.00.0NaN
22Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/23612412NaN0.00.01.00.0NaN
33PLoS ONENaN20069999http://www.plosone.org/236124531.01.01.00.00.04.035714
44EU-topíasEU-topías20119999NaN209124, 138, 402, 29214, 5NaN0.00.00.00.0NaN
...................................................
1341998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/236124175NaN0.00.01.00.0NaN
1342999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...8312418NaN0.01.01.01.0NaN
1343999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...8312418NaN0.01.01.01.0NaN
13441000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...2341241119NaN0.00.01.00.0NaN
13451001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl2361241217NaN0.00.00.00.0NaN
-

1346 rows × 16 columns

-
- - - - -```python -# renommage des champs finaux -journals_export = journals_export.rename(columns={'title' : 'name', 'url' : 'website'}) -# remplacement des vides et id à int -journals_export['starting_year'] = journals_export['starting_year'].fillna(0) -journals_export['end_year'] = journals_export['end_year'].fillna(9999) -journals_export['name_short_iso_4'] = journals_export['name_short_iso_4'].fillna('') -journals_export['website'] = journals_export['website'].fillna('') -journals_export['doaj_seal'] = journals_export['doaj_seal'].fillna('0') -journals_export['country'] = journals_export['country'].fillna('999999') -journals_export['language'] = journals_export['language'].fillna('999999') -journals_export['doaj_status'] = journals_export['doaj_status'].astype(int) -journals_export['doaj_seal'] = journals_export['doaj_seal'].astype(int) -journals_export['lockss'] = journals_export['lockss'].astype(int) -journals_export['portico'] = journals_export['portico'].astype(int) -journals_export['nlch'] = journals_export['nlch'].astype(int) -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
01Revue médicale suisseRev. méd. suisse200599992151381100000NaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN
22Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN
33PLoS ONE20069999http://www.plosone.org/23612453111004.035714
44EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaN
...................................................
1341998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN
1342999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN
1343999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN
13441000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN
13451001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaN
-

1346 rows × 16 columns

-
- - - - -```python -journals_export = journals_export.drop_duplicates(subset='id') -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
01Revue médicale suisseRev. méd. suisse200599992151381100000NaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN
33PLoS ONE20069999http://www.plosone.org/23612453111004.035714
44EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaN
55Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/2361241600010NaN
...................................................
1339997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412414700010NaN
1341998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN
1342999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN
13441000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN
13451001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaN
-

911 rows × 16 columns

-
- - - - -```python -# test journaux sans titre -journals_export.loc[journals_export['name'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
-
- - - - -```python -# export et suppression des journaux sans titre -# export csv -journals_export.loc[journals_export['name'].isna()].to_csv('sample/sherpa_journals_without_title.tsv', sep='\t', encoding='utf-8', index=False) -# export excel -journals_export.loc[journals_export['name'].isna()].to_excel('sample/sherpa_journals_without_title.xlsx', index=False) -journals_export = journals_export.loc[journals_export['name'].notna()] -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
01Revue médicale suisseRev. méd. suisse200599992151381100000NaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN
33PLoS ONE20069999http://www.plosone.org/23612453111004.035714
44EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaN
55Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/2361241600010NaN
...................................................
1339997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412414700010NaN
1341998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN
1342999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN
13441000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN
13451001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaN
-

911 rows × 16 columns

-
- - - - -```python -journals_export.loc[journals_export['name'].str.contains('(Print)')] -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\strings.py:1843: UserWarning: This pattern has match groups. To actually get the groups, use str.extract. - return func(self, *args, **kwargs) - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
8654Helvetica physica acta (Print)Helv. phys. acta (Print)19281999215124, 138, 15114100000NaN
239155Studies in health technology and informatics (...Stud. health technol. inform. (Print)1991999915612419000000NaN
441306Bioethica Forum (Basel. 2008. Print)Bioeth. Forum (Basel, 2008, Print)20089999215138, 124, 151114300000NaN
534373Schweizerische Ärztezeitung (Print)Schweiz. Ärzteztg. (Print)19529999215203, 151, 138117000000NaN
601430The European physical journal. B, Condensed ma...Eur. phys. j., B Cond. matter phys. (Print)19989999761241195, 43001111.25
650467Conference on Lasers and Electro-optics (Print)Conf. Lasers Electro-opt. (Print)20039999http://www.cleoconference.org/23612413900000NaN
850618Swiss archives of neurology, psychiatry and ps...Swiss arch. neurol. psychiatry psychother. (Pr...20169999215151, 124, 13862001000NaN
901660Journal der Deutschen Dermatologischen Gesells...20039999234151, 124128300010NaN
957702IEEE/LEOS International Conference on Optical ...IEEE/LEOS Int. Conf. Opt. MEMS Nanophotonics (...200720uuhttp://ieeexplore.ieee.org/xpl/conhome.jsp?pun...236124128000000NaN
1104814Forumpoenale (Print)Forumpoenale (Print)20089999215151, 203, 138120400000NaN
1182877Gesnerus (Print)Gesnerus (Print)19439999215124, 138, 151, 203114300000NaN
1336994GG@G (Print)GG@G (Print)20009999215124138000000NaN
-
- - - - -```python -journals_export.loc[journals_export['name'].str.contains('(Online)')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
1257936Plastic and reconstructive surgery (Online)Plast. reconstr. surg. (Online)19639999http://gateway.ovid.com/ovidweb.cgi?T=JS&MODE=...236124136300000NaN
-
- - - - -```python -# remplacement des mentions " (Print)" et " (Online)" dans les titres -journals_export['name'] = journals_export['name'].str.replace('(Print)', '') -journals_export['name'] = journals_export['name'].str.replace('(Online)', '') -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
01Revue médicale suisseRev. méd. suisse200599992151381100000NaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN
33PLoS ONE20069999http://www.plosone.org/23612453111004.035714
44EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaN
55Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/2361241600010NaN
...................................................
1339997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412414700010NaN
1341998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN
1342999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN
13441000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN
13451001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaN
-

911 rows × 16 columns

-
- - - - -```python -journals_export.loc[journals_export['name'].str.contains('(Print)')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
-
- - - - -```python -journals_export.loc[journals_export['name'].str.contains('(Online)')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_statuspublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_score
-
- - - -## Table sherpa_policies - - -```python -# creation du DF -col_names = ['journal', - 'issn', - 'sherpa_id', - 'sherpa_uri', - 'open_access_prohibited', - 'additional_oa_fee', - 'article_version', - 'license', - 'embargo', - 'prerequisites', - 'prerequisite_funders', - 'prerequisite_funders_name', - 'prerequisite_funders_fundref', - 'prerequisite_funders_ror', - 'prerequisite_funders_country', - 'prerequisite_funders_url', - 'prerequisite_funders_sherpa_id', - 'prerequisite_subjects', - 'location', - 'locations_ir', - 'locations_not_ir', - 'named_repository', - 'named_academic_social_network', - 'copyright_owner', - 'publisher_deposit', - 'archiving', - 'conditions', - 'public_notes' - ] -sherpa_policies = pd.DataFrame(columns = col_names) -sherpa_policies -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notes
-
- - - - -```python -# dédoublonage par journal id -issn_dedup = issn.drop_duplicates(subset='journal') -issn_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeid
00001-28150001-2815532PRINT11
20001-48420001-4842498PRINT13
40001-49660001-4966789PRINT15
70001-62680001-6268166PRINT18
90001-63220001-6322807PRINT110
.....................
17512469-99502469-995041PRINT11752
17532470-00102470-001080PRINT11754
17552470-00452470-0045533OTHER31756
17572475-99532475-9953608ELECTRONIC21758
17582504-44272504-4427994PRINT11759
-

909 rows × 6 columns

-
- - - - -```python -# type de repositories qui provoquent archiving = 1 : -# tous les types : 'academic_social_network', 'any_repository', 'any_website', 'authors_homepage', -# 'funder_designated_location', 'institutional_repository', 'institutional_website', 'named_academic_social_network', -# 'named_repository', 'non_commercial_institutional_repository', 'non_commercial_repository', -# 'non_commercial_social_network', 'non_commercial_subject_repository', 'non_commercial_website', -# 'preprint_repository', 'subject_repository', 'this_journal' -repositories_archiving = ['any_repository', - 'institutional_repository', - 'institutional_website', - 'non_commercial_institutional_repository', - 'non_commercial_repository', - 'any_website', - 'non_commercial_website'] - -# extraction des termes -for index, row in issn_dedup.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - # test d'existance du fichier - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - # initialisation des variables à extraire - sherpa_id = np.nan - sherpa_uri = np.nan - open_access_prohibited = np.nan - location = np.nan - locations_ir = '' - locations_not_ir = '' - additional_oa_fee = np.nan - article_versions = np.nan - article_version = np.nan - licenses = [] - embargo = 0 - prerequisites = np.nan - prerequisite_funders = np.nan - prerequisite_funders_name = np.nan - prerequisite_funders_fundref = np.nan - prerequisite_funders_ror = np.nan - prerequisite_funders_country = np.nan - prerequisite_funders_url = np.nan - prerequisite_funders_sherpa_id = np.nan - prerequisite_subjects = np.nan - named_repository = np.nan - named_academic_social_network = np.nan - copyright_owner = np.nan - publisher_deposit = np.nan - archiving = np.nan - conditions = np.nan - public_notes = np.nan - if (len(data['items']) > 0): - if ('id' in data['items'][0]): - sherpa_id = data['items'][0]['id'] - # test si l'id est déjà présent - if sherpa_id in sherpa_policies['sherpa_id'] : - print('SKIP ' + str(sherpa_id)) - else : - poilicies = data['items'][0]['publisher_policy'] - for poilicy in poilicies: - # initialisation des variables à extraire - sherpa_uri = np.nan - open_access_prohibited = np.nan - if ('uri' in poilicy): - sherpa_uri = poilicy['uri'] - if ('open_access_prohibited' in poilicy): - open_access_prohibited = poilicy['open_access_prohibited'] - if ('permitted_oa' in poilicy): - poas = poilicy['permitted_oa'] - for poa in poas: - additional_oa_fee = np.nan - article_versions = np.nan - article_version = np.nan - licenses = [] - embargo = 0 - prerequisites = np.nan - prerequisite_funders = np.nan - prerequisite_funders_name = np.nan - prerequisite_funders_fundref = np.nan - prerequisite_funders_ror = np.nan - prerequisite_funders_country = np.nan - prerequisite_funders_url = np.nan - prerequisite_funders_sherpa_id = np.nan - prerequisite_subjects = np.nan - named_repository = np.nan - named_academic_social_network = np.nan - locations_ir = '' - locations_not_ir = '' - copyright_owner = np.nan - conditions = np.nan - public_notes = np.nan - if ('additional_oa_fee' in poa): - additional_oa_fee = poa['additional_oa_fee'] - if ('location' in poa): - archiving = 0 - location = '' - mylocations = poa['location']['location'] - mylocations_text = poa['location']['location_phrases'] - if (type(mylocations) is not list): - mylocations = [mylocations] - location = ' ; '.join(mylocations) - for locationi in mylocations: - if locationi in repositories_archiving : - archiving = archiving + 1 - for locationi_text in mylocations_text: - if locationi_text['value'] == locationi : - if locations_ir == '': - locations_ir = locations_ir + locationi_text['phrase'] - else : - if locationi_text['phrase'] not in locations_ir : - locations_ir = locations_ir + ' ; ' + locationi_text['phrase'] - else : - for locationi_text in mylocations_text: - if locationi_text['value'] == locationi : - if locations_not_ir == '': - locations_not_ir = locations_not_ir + locationi_text['phrase'] - else : - if locationi_text['phrase'] not in locations_not_ir : - locations_not_ir = locations_not_ir + ' ; ' + locationi_text['phrase'] - # print (archiving) - if archiving > 0: - archiving = True - else : - archiving = False - if ('named_repository' in poa['location']): - if (type(poa['location']['named_repository']) is list): - named_repository = ' ; '.join(poa['location']['named_repository']) - else : - named_repository = poa['location']['named_repository'] - locations_not_ir = locations_not_ir.replace('Named Repository', named_repository) - locations_ir = locations_ir.replace('Named Repository', named_repository) - if ('named_academic_social_network' in poa['location']): - if (type(poa['location']['named_academic_social_network']) is list): - named_academic_social_network = ' ; '.join(poa['location']['named_academic_social_network']) - else : - named_academic_social_network = poa['location']['named_academic_social_network'] - locations_not_ir = locations_not_ir.replace('Named Academic Social Network', named_academic_social_network) - locations_ir = locations_ir.replace('Named Academic Social Network', named_academic_social_network) - if ('embargo' in poa): - # print(poa['embargo']) - embargo_amount = 0 - if ('amount' in poa['embargo']): - embargo_amount = poa['embargo']['amount'] - if ('units' in poa['embargo']): - if (poa['embargo']['units'] == 'months') : - embargo = embargo_amount - elif (poa['embargo']['units'] == 'years') : - embargo = embargo_amount*12 - elif (poa['embargo']['units'] == 'weeks') : - embargo = int(embargo_amount/4) - if (embargo == 0): - embargo = 1 - elif (poa['embargo']['units'] == 'days') : - embargo = int(embargo_amount/30) - if (embargo == 0): - embargo = 1 - else : - embargo = embargo_amount - if ('prerequisites' in poa): - if 'prerequisites' in poa['prerequisites'] : - if (type(poa['prerequisites']['prerequisites']) is list): - prerequisites = ' ; '.join(poa['prerequisites']['prerequisites']) - else: - prerequisites = poa['prerequisites']['prerequisites'] - if ('prerequisite_funders' in poa['prerequisites']): - prerequisite_funders = True - # prerequisite_funders = poa['prerequisites']['prerequisite_funders'] - # if (type(poa['prerequisites']['prerequisite_funders']) is list): - # prerequisite_funders = ' ; '.join(poa['prerequisites']['prerequisite_funders']) - # else: - # prerequisite_funders = poa['prerequisites']['prerequisite_funders'] - if ('prerequisite_subjects' in poa['prerequisites']): - prerequisite_subjects = True - # prerequisite_subjects = poa['prerequisites']['prerequisite_subjects'] - # if (type(poa['prerequisite_subjects']) is list): - # prerequisite_subjects = ' ; '.join(poa['prerequisite_subjects']) - # else: - # prerequisite_subjects = poa['prerequisite_subjects'] - if ('copyright_owner' in poa): - copyright_owner = poa['copyright_owner'] - if ('publisher_deposit' in poa): - publisher_deposit = '' - if (type(poa['publisher_deposit']) is list): - for deposit in poa['publisher_deposit']: - if 'type' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['type'] - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + ' (' + deposit['repository_metadata']['name'][0]['name'] + ')' - else : - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['name'][0]['name'] - publisher_deposit = publisher_deposit + ' ; ' - else : - deposit = poa['publisher_deposit'] - if 'type' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['type'] - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + ' (' + deposit['repository_metadata']['name'][0]['name'] + ')' - else : - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['name'][0]['name'] - publisher_deposit = publisher_deposit + ' ; ' - # print (publisher_deposit) - if ('conditions' in poa): - if (type(poa['conditions']) is list): - conditions = ' ; '.join(poa['conditions']) - else: - conditions = poa['conditions'] - if ('public_notes' in poa): - if (type(poa['public_notes']) is list): - public_notes = ' ; '.join(poa['public_notes']) - else: - public_notes = poa['public_notes'] - if ('license' in poa): - licenses = poa['license'] - if (type(licenses) is not list): - licenses = [licenses] - else : - licenses = [''] - # avec article version - if ('article_version' in poa): - article_versions = poa['article_version'] - for article_version in article_versions: - for license in licenses: - if ('license' in license): - mylicense = license['license'] - else : - mylicense = '' - # avec prerequisites - if ('prerequisites' in poa) : - # avec prerequisites_funders - if ('prerequisite_funders' in poa['prerequisites']): - for prerequisite_fundersi in poa['prerequisites']['prerequisite_funders'] : - prerequisite_funders_name = prerequisite_fundersi['funder_metadata']['name'][0]['name'] - if 'acronym' in prerequisite_fundersi['funder_metadata']['name'][0]: - prerequisite_funders_name = prerequisite_funders_name + ' (' + prerequisite_fundersi['funder_metadata']['name'][0]['acronym'] + ')' - if 'identifiers' in prerequisite_fundersi['funder_metadata'] : - for fund_identifier in prerequisite_fundersi['funder_metadata']['identifiers'] : - if fund_identifier['type'] == 'fundref': - prerequisite_funders_fundref = fund_identifier['identifier'] - if fund_identifier['type'] == 'ror': - prerequisite_funders_ror = fund_identifier['identifier'] - if 'country' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_country = prerequisite_fundersi['funder_metadata']['country'] - if 'url' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_url = prerequisite_fundersi['funder_metadata']['url'][0]['url'] - prerequisite_funders_sherpa_id = prerequisite_fundersi['funder_metadata']['id'] - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites_funders - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - - # sans article version - else : - if (type(licenses) is not list): - licenses = [licenses] - for license in licenses: - if ('license' in license): - mylicense = license['license'] - else : - mylicense = '' - # avec prerequisites - if ('prerequisites' in poa) : - # avec prerequisites_funders - if ('prerequisite_funders' in poa['prerequisites']): - for prerequisite_fundersi in poa['prerequisites']['prerequisite_funders'] : - prerequisite_funders_name = prerequisite_fundersi['funder_metadata']['name'][0]['name'] - if 'acronym' in prerequisite_fundersi['funder_metadata']['name'][0]: - prerequisite_funders_name = prerequisite_funders_name + ' (' + prerequisite_fundersi['funder_metadata']['name'][0]['acronym'] + ')' - if 'identifiers' in prerequisite_fundersi['funder_metadata'] : - for fund_identifier in prerequisite_fundersi['funder_metadata']['identifiers'] : - if fund_identifier['type'] == 'fundref': - prerequisite_funders_fundref = fund_identifier['identifier'] - if fund_identifier['type'] == 'ror': - prerequisite_funders_ror = fund_identifier['identifier'] - if 'country' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_country = prerequisite_fundersi['funder_metadata']['country'] - if 'url' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_url = prerequisite_fundersi['funder_metadata']['url'][0]['url'] - prerequisite_funders_sherpa_id = prerequisite_fundersi['funder_metadata']['id'] - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites_funders - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans permitted_oa - else : - print ('permitted_oa MISSING') - else : - print ('id MISSING') -``` - - 0 - 20 - 40 - 50 - 60 - SKIP 321 - 110 - SKIP 475 - SKIP 476 - 180 - 220 - 250 - 260 - 290 - 300 - 330 - 340 - 360 - 370 - 380 - 420 - permitted_oa MISSING - 430 - permitted_oa MISSING - SKIP 1319 - SKIP 880 - permitted_oa MISSING - 510 - permitted_oa MISSING - 530 - 540 - 550 - 560 - SKIP 1342 - 570 - 590 - SKIP 3082 - SKIP 2465 - SKIP 1682 - SKIP 325 - SKIP 3179 - 670 - 680 - SKIP 1641 - SKIP 1202 - 720 - SKIP 3995 - 730 - SKIP 3475 - SKIP 3490 - 740 - 750 - 760 - SKIP 1383 - SKIP 1357 - permitted_oa MISSING - 830 - 840 - SKIP 1868 - 850 - SKIP 883 - 880 - 890 - SKIP 1392 - 900 - 910 - SKIP 1377 - 920 - SKIP 3443 - 930 - 940 - SKIP 1123 - SKIP 3581 - SKIP 3558 - SKIP 745 - 980 - 990 - SKIP 11 - SKIP 2499 - 1000 - SKIP 42 - 1010 - 1020 - SKIP 314 - 1030 - 1040 - SKIP 1380 - SKIP 229 - SKIP 1518 - SKIP 5682 - SKIP 4708 - SKIP 1661 - 1130 - SKIP 6585 - 1140 - SKIP 3212 - 1150 - SKIP 335 - SKIP 6774 - 1160 - SKIP 6590 - 1180 - SKIP 1639 - SKIP 5094 - SKIP 1254 - 1200 - SKIP 6325 - SKIP 3539 - SKIP 1444 - SKIP 250 - SKIP 1543 - SKIP 3415 - SKIP 3571 - SKIP 3474 - SKIP 3586 - SKIP 3220 - SKIP 3837 - SKIP 1650 - SKIP 1051 - SKIP 3572 - SKIP 612 - SKIP 6587 - SKIP 3567 - SKIP 1654 - SKIP 4070 - SKIP 1643 - SKIP 6588 - SKIP 1657 - SKIP 1687 - SKIP 1692 - SKIP 1341 - 1320 - SKIP 7150 - SKIP 876 - 1330 - SKIP 7007 - SKIP 7091 - 1340 - 1350 - SKIP 173 - SKIP 4703 - 1360 - SKIP 2515 - 1370 - SKIP 242 - SKIP 3930 - SKIP 2004 - 1400 - 1410 - SKIP 2123 - SKIP 1320 - SKIP 1459 - SKIP 1588 - SKIP 7678 - SKIP 1391 - SKIP 878 - SKIP 138 - SKIP 7632 - SKIP 1644 - SKIP 1637 - SKIP 2207 - SKIP 2428 - SKIP 2432 - 1460 - SKIP 2477 - SKIP 2430 - SKIP 1653 - SKIP 2397 - SKIP 5935 - SKIP 3527 - SKIP 148 - SKIP 7793 - SKIP 4005 - SKIP 7768 - SKIP 3455 - SKIP 1652 - SKIP 3570 - SKIP 7792 - SKIP 3533 - SKIP 6586 - 1520 - SKIP 7787 - SKIP 3355 - 1530 - SKIP 226 - SKIP 1655 - SKIP 7783 - 1540 - SKIP 6582 - 1550 - SKIP 7762 - SKIP 4691 - SKIP 1911 - SKIP 1447 - SKIP 1778 - SKIP 1888 - SKIP 228 - SKIP 7407 - SKIP 7965 - 1590 - 1600 - 1610 - SKIP 821 - SKIP 823 - SKIP 7714 - 1620 - SKIP 172 - SKIP 2624 - SKIP 3654 - SKIP 1659 - SKIP 1656 - SKIP 1658 - SKIP 1393 - 1640 - SKIP 6778 - SKIP 8220 - SKIP 7872 - SKIP 1587 - SKIP 822 - SKIP 1460 - SKIP 6581 - SKIP 3568 - 1670 - SKIP 7509 - SKIP 7799 - SKIP 7765 - 1680 - SKIP 7761 - SKIP 7800 - 1690 - SKIP 1244 - 1710 - SKIP 6222 - 1730 - 1740 - 1750 - - - -```python -sherpa_policies -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notes
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoaccepted12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...ChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN
.......................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoaccepted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublished0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN
-

8595 rows × 28 columns

-
- - - - -```python -# convertir l'index en id -sherpa_policies = sherpa_policies.reset_index() -# ajout de l'id avec l'index + 1 -sherpa_policies['id'] = sherpa_policies['index'] + 1 -del sherpa_policies['index'] -sherpa_policies -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN1
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoaccepted12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN2
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN3
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN4
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...ChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN5
..........................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8591
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoaccepted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8592
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublished0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8593
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8594
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8595
-

8595 rows × 29 columns

-
- - - - -```python -# export csv -sherpa_policies.to_csv('sample/sherpa_policies_brut.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -sherpa_policies.to_excel('sample/sherpa_policies_brut.xlsx', index=False) -``` - -## Calcul de la catégorie "green" et export final des journaux - - -```python -sherpa_policies -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN1
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoaccepted12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN2
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN3
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN4
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...ChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN5
..........................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmitted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8591
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoaccepted0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8592
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublished0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8593
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8594
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8595
-

8595 rows × 29 columns

-
- - - - -```python -sherpa_policies_ir = sherpa_policies.loc[(sherpa_policies['archiving'] == True) & (sherpa_policies['article_version'] == 'published') & (sherpa_policies['prerequisite_funders'].isna())][['journal', 'embargo', 'license', 'conditions']] -sherpa_policies_ir -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalembargolicenseconditions
25320cc_byPublished source must be acknowledged
35320cc_by_nc_ndPublished source must be acknowledged
949812cc_byNaN
1049812cc_by_nc_ndNaN
1149812bespoke_licenseNaN
...............
85885330cc_byNaN
85895330cc_byNaN
85926080Must link to published article ; Publisher cop...
85936080cc_byNaN
85946080cc_byNaN
-

1118 rows × 4 columns

-
- - - - -```python -# dedup -sherpa_policies_ir_id = sherpa_policies_ir[['journal', 'embargo']].sort_values(by=['journal', 'embargo']) -sherpa_policies_ir_dedup = sherpa_policies_ir_id.drop_duplicates(subset='journal') -sherpa_policies_ir_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalembargo
236720
834230
736650
261612
708670
.........
64799960
68739970
18239980
39449990
675010000
-

579 rows × 2 columns

-
- - - - -```python -# ajout de la ctégorie green (2) -sherpa_policies_ir_dedup['oa_status'] = 2 -sherpa_policies_ir_dedup -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalembargooa_status
2367202
8342302
7366502
2616122
7086702
............
647999602
687399702
182399802
394499902
6750100002
-

579 rows × 3 columns

-
- - - - -```python -# merge avec les revues -sherpa_policies_ir_dedup = sherpa_policies_ir_dedup.rename(columns={'journal' : 'id'}) -journals_export = pd.merge(journals_export, sherpa_policies_ir_dedup, on='id', how='left') -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_status_xpublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreembargooa_status_y
01Revue médicale suisseRev. méd. suisse200599992151381100000NaNNaNNaN
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN02.0
23PLoS ONE20069999http://www.plosone.org/23612453111004.03571402.0
34EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaNNaNNaN
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/2361241600010NaN02.0
.........................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412414700010NaN02.0
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN02.0
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN02.0
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN02.0
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaNNaNNaN
-

911 rows × 18 columns

-
- - - - -```python -# choix de la catégorie OA -journals_export['oa_status'] = journals_export['oa_status_x'] -journals_export.loc[(journals_export['oa_status_x'] == 1) & (journals_export['oa_status_y'].notna()), 'oa_status'] = journals_export['oa_status_y'] -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguageoa_status_xpublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreembargooa_status_yoa_status
01Revue médicale suisseRev. méd. suisse200599992151381100000NaNNaNNaN1.0
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/2361241200010NaN02.02.0
23PLoS ONE20069999http://www.plosone.org/23612453111004.03571402.05.0
34EU-topíasEU-topías20119999209124, 138, 402, 29214, 500000NaNNaNNaN1.0
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/2361241600010NaN02.02.0
............................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412414700010NaN02.02.0
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612417500010NaN02.02.0
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...831241800111NaN02.02.0
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...234124111900010NaN02.02.0
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl236124121700000NaNNaNNaN1.0
-

911 rows × 19 columns

-
- - - - -```python -# 6 : Diamond -# 5 : Gold -# 4 : Full -# 3 : Hybrid -# 2 : Green -# 1 : UNKNOWN -journals_export['oa_status'].value_counts() -``` - - - - - 2.0 518 - 1.0 306 - 5.0 70 - 6.0 17 - Name: oa_status, dtype: int64 - - - - -```python -del journals_export['embargo'] -del journals_export['oa_status_x'] -del journals_export['oa_status_y'] -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguagepublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreoa_status
01Revue médicale suisseRev. méd. suisse20059999215138100000NaN1.0
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/236124200010NaN2.0
23PLoS ONE20069999http://www.plosone.org/2361243111004.0357145.0
34EU-topíasEU-topías20119999209124, 138, 402, 2924, 500000NaN1.0
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/236124600010NaN2.0
...................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-17262341244700010NaN2.0
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/2361247500010NaN2.0
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...83124800111NaN2.0
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...23412411900010NaN2.0
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl23612421700000NaN1.0
-

911 rows × 16 columns

-
- - - - -```python -journals_export['oa_status'] = journals_export['oa_status'].astype(int) -journals_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguagepublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreoa_status
01Revue médicale suisseRev. méd. suisse20059999215138100000NaN1
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/236124200010NaN2
23PLoS ONE20069999http://www.plosone.org/2361243111004.0357145
34EU-topíasEU-topías20119999209124, 138, 402, 2924, 500000NaN1
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/236124600010NaN2
...................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-17262341244700010NaN2
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/2361247500010NaN2
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...83124800111NaN2
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...23412411900010NaN2
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl23612421700000NaN1
-

911 rows × 16 columns

-
- - - - -```python -# export csv -journals_export.to_csv('sample/journal_fin_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -journals_export.to_excel('sample/journal_fin_sherpa.xlsx', index=False) -``` - - -```python -# export csv -sherpa_policies_ir_dedup.to_csv('sample/journal_ir.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -sherpa_policies_ir_dedup.to_excel('sample/journal_ir.xlsx', index=False) -``` - - -```python - -``` diff --git a/import_scripts/06_oacct_sherpa.py b/import_scripts/06_oacct_sherpa.py deleted file mode 100644 index 158819fd..00000000 --- a/import_scripts/06_oacct_sherpa.py +++ /dev/null @@ -1,1107 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données de Sherpa/Romeo obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Données de Sherpa/Romeo -# -# ### Exemple -# -# https://v2.sherpa.ac.uk/cgi/retrieve_by_id?item-type=publication&api-key=EEE6F146-678E-11EB-9C3A-202F3DE2659A&format=Json&identifier=17601 - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) - - -# ## Table publisher_sherpa - -# In[2]: - - -# creation du DF -col_names = ['journal', - 'publisher_id', - 'name', - 'country', - 'type', - 'url' - ] -publisher_sherpa = pd.DataFrame(columns = col_names) -publisher_sherpa - - -# ## Table sherpa match issn - -# In[3]: - - -# creation du DF -col_names = ['issn', - 'sherpa_match', - ] -sherpa_match_issn = pd.DataFrame(columns = col_names) -sherpa_match_issn - - -# ## Table sherpa issns - -# In[4]: - - -# creation du DF -col_names = ['issn', - 'type', - ] -sherpa_issn = pd.DataFrame(columns = col_names) -sherpa_issn - - -# ## Table sherpa journals - -# In[5]: - - -# creation du DF -col_names = ['journal', - 'title', - 'url', - ] -sherpa_journal = pd.DataFrame(columns = col_names) -sherpa_journal - - -# ## Import table Journals et ISSN - -# In[6]: - - -journal = pd.read_csv('sample/journals_publishers_brut.tsv', encoding='utf-8', header=0, sep='\t') -journal - - -# In[7]: - - -issn = pd.read_csv('sample/issn_brut.tsv', encoding='utf-8', header=0, sep='\t') -issn - - -# In[8]: - - -issn_ids = pd.read_csv('sample/issn_ids.tsv', encoding='utf-8', header=0, sep='\t') -issn_ids - - -# ## Extraction de Sherpa Romeo - -# In[9]: - - -# extraction des informations à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # if (((index/10) - int(index/10)) == 0) : - # print(index) - # initialisation des variables à extraire - publisher_id = np.nan - publisher_name = '' - publisher_country = '' - publisher_type = '' - publisher_url = '' - # boucle des fichiers json - # test d'existance du fichier - # print(row['issn']) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - if (len(data['items']) > 0): - publisher_id = data['items'][0]['publishers'][0]['publisher']['id'] - if ('country' in data['items'][0]['publishers'][0]['publisher']): - publisher_country = data['items'][0]['publishers'][0]['publisher']['country'] - if ('relationship_type' in data['items'][0]['publishers'][0]): - publisher_type = data['items'][0]['publishers'][0]['relationship_type'] - if ('url' in data['items'][0]['publishers'][0]['publisher']): - publisher_url = data['items'][0]['publishers'][0]['publisher']['url'] - if ('name' in data['items'][0]['publishers'][0]['publisher']['name'][0]): - publisher_name = data['items'][0]['publishers'][0]['publisher']['name'][0]['name'] - sherpa_match = 'OK' - publisher_sherpa = publisher_sherpa.append({'journal' : journal_id, 'publisher_id' : publisher_id, - 'name' : publisher_name, 'country' : publisher_country, - 'type' : publisher_type, 'url' : publisher_url}, ignore_index=True) - else : - print(row['issn'] + ' - trouvé mais vide') - sherpa_match = 'empty' - else : - print(row['issn'] + ' - pas trouvé') - sherpa_match = 'missing' - sherpa_match_issn = sherpa_match_issn.append({'issn' : row['issn'], 'sherpa_match' : sherpa_match}, ignore_index=True) - - -# In[10]: - - -publisher_sherpa - - -# In[11]: - - -sherpa_match_issn - - -# In[12]: - - -# dedup -publisher_sherpa_dedup = publisher_sherpa.drop_duplicates() -publisher_sherpa_dedup - - -# In[13]: - - -sherpa_match_issn - - -# In[14]: - - -# ajout du issnl et du titre -sherpa_match_issn = pd.merge(sherpa_match_issn, issn_ids, on='issn', how='left') -sherpa_match_issn = pd.merge(sherpa_match_issn, journal[['issnl', 'title']], on='issnl', how='left') -sherpa_match_issn - - -# In[15]: - - -sherpa_match_results = sherpa_match_issn[['id', 'issnl', 'sherpa_match']].groupby(['issnl', 'sherpa_match']).count() -sherpa_match_results - - -# In[16]: - - -sherpa_match_results = sherpa_match_results.reset_index() -sherpa_match_results - - -# In[17]: - - -sherpa_match_results_ok = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'OK'] -issn_ids_issnl = issn_ids[['issnl', 'journal']].drop_duplicates(subset='issnl') -issn_ids_issnl = pd.merge(issn_ids_issnl, sherpa_match_results_ok, on='issnl', how='left') -issn_ids_issnl = pd.merge(issn_ids_issnl, journal[['issnl', 'title']], on='issnl', how='left') -issn_ids_issnl - - -# In[18]: - - -journals_not_sherpa = issn_ids_issnl.loc[issn_ids_issnl['sherpa_match'].isna()] -journals_not_sherpa - - -# In[19]: - - -sherpa_match_results_empty = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'empty'] -sherpa_match_results_missing = sherpa_match_results.loc[sherpa_match_results['sherpa_match'] == 'missing'] -del journals_not_sherpa['sherpa_match'] -del journals_not_sherpa['id'] -journals_not_sherpa = pd.merge(journals_not_sherpa, sherpa_match_results_empty, on='issnl', how='left') -del journals_not_sherpa['id'] -journals_not_sherpa = pd.merge(journals_not_sherpa, sherpa_match_results_missing, on='issnl', how='left') -del journals_not_sherpa['id'] -journals_not_sherpa - - -# In[20]: - - -# extraction des informations des journaux à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # test d'existance du fichier - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - title = np.nan - url = np.nan - if (len(data['items']) > 0): - if ('url' in data['items'][0]): - url = data['items'][0]['url'] - if ('title' in data['items'][0]['title'][0]): - title = data['items'][0]['title'][0]['title'] - sherpa_journal = sherpa_journal.append({'journal' : journal_id, 'title' : title, 'url' : url}, ignore_index=True) - - -# In[21]: - - -sherpa_journal - - -# In[22]: - - -# extraction des informations à partir des données Sherpa/Romeo -for index, row in issn.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # test d'existance du fichier - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - myissn = np.nan - mytype = np.nan - data = json.load(f) - if (len(data['items']) > 0): - if ('issns' in data['items'][0]): - issns = data['items'][0]['issns'] - for i in issns: - if ('issn' in i): - myissn = i['issn'] - if ('type' in i): - mytype = i['type'] - sherpa_issn = sherpa_issn.append({'issn' : myissn, 'type' : mytype}, ignore_index=True) - - -# In[23]: - - -sherpa_issn - - -# In[24]: - - -# dedup -sherpa_issn = sherpa_issn.drop_duplicates() -sherpa_issn - - -# In[25]: - - -# completer le fichier des issns avec les types de sherpa -issn2 = pd.merge(issn, sherpa_issn, on='issn', how='left') -issn2 - - -# In[26]: - - -# exports csv -publisher_sherpa_dedup.to_csv('sample/publisher_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -sherpa_match_issn.to_csv('sample/sherpa_match_issn.tsv', sep='\t', encoding='utf-8', index=False) -sherpa_journal.to_csv('sample/sherpa_journal.tsv', sep='\t', encoding='utf-8', index=False) -issn2.to_csv('sample/issn_sherpa.tsv', sep='\t', encoding='utf-8', index=False) -journals_not_sherpa.to_csv('sample/journals_not_sherpa.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[27]: - - -# exports excel -publisher_sherpa_dedup.to_excel('sample/publisher_sherpa.xlsx', index=False) -sherpa_match_issn.to_excel('sample/sherpa_match_issn.xlsx', index=False) -sherpa_journal.to_excel('sample/sherpa_journal.xlsx', index=False) -issn2.to_excel('sample/issn_sherpa.xlsx', index=False) -journals_not_sherpa.to_excel('sample/journals_not_sherpa.xlsx', index=False) - - -# In[28]: - - -# ajout des titres Sherpa a la table des revues -# renommer les colonnes -sherpa_journal = sherpa_journal.rename(columns={'journal' : 'id'}) -journal = pd.merge(journal, sherpa_journal, on='id', how='left') -journal - - -# In[29]: - - -# choix du titre et url -journal['url'] = journal['url_y'] -journal.loc[journal['url_y'].isna(), 'url'] = journal['url_x'] -journal['title'] = journal['title_y'] -journal.loc[journal['title_y'].isna(), 'title'] = journal['title_x'] -journal - - -# In[30]: - - -journals_export = journal[['id', 'title', 'name_short_iso_4', 'starting_year', 'end_year', 'url', 'country', 'language', 'oa_status', 'publisher', 'doaj_seal', 'doaj_status', 'lockss', 'portico', 'nlch', 'qoam_av_score']] -journals_export - - -# In[31]: - - -# renommage des champs finaux -journals_export = journals_export.rename(columns={'title' : 'name', 'url' : 'website'}) -# remplacement des vides et id à int -journals_export['starting_year'] = journals_export['starting_year'].fillna(0) -journals_export['end_year'] = journals_export['end_year'].fillna(9999) -journals_export['name_short_iso_4'] = journals_export['name_short_iso_4'].fillna('') -journals_export['website'] = journals_export['website'].fillna('') -journals_export['doaj_seal'] = journals_export['doaj_seal'].fillna('0') -journals_export['country'] = journals_export['country'].fillna('999999') -journals_export['language'] = journals_export['language'].fillna('999999') -journals_export['doaj_status'] = journals_export['doaj_status'].astype(int) -journals_export['doaj_seal'] = journals_export['doaj_seal'].astype(int) -journals_export['lockss'] = journals_export['lockss'].astype(int) -journals_export['portico'] = journals_export['portico'].astype(int) -journals_export['nlch'] = journals_export['nlch'].astype(int) -journals_export - - -# In[32]: - - -journals_export = journals_export.drop_duplicates(subset='id') -journals_export - - -# In[33]: - - -# test journaux sans titre -journals_export.loc[journals_export['name'].isna()] - - -# In[34]: - - -# export et suppression des journaux sans titre -# export csv -journals_export.loc[journals_export['name'].isna()].to_csv('sample/sherpa_journals_without_title.tsv', sep='\t', encoding='utf-8', index=False) -# export excel -journals_export.loc[journals_export['name'].isna()].to_excel('sample/sherpa_journals_without_title.xlsx', index=False) -journals_export = journals_export.loc[journals_export['name'].notna()] -journals_export - - -# In[35]: - - -journals_export.loc[journals_export['name'].str.contains('(Print)')] - - -# In[36]: - - -journals_export.loc[journals_export['name'].str.contains('(Online)')] - - -# In[37]: - - -# remplacement des mentions " (Print)" et " (Online)" dans les titres -journals_export['name'] = journals_export['name'].str.replace('(Print)', '') -journals_export['name'] = journals_export['name'].str.replace('(Online)', '') -journals_export - - -# In[38]: - - -journals_export.loc[journals_export['name'].str.contains('(Print)')] - - -# In[39]: - - -journals_export.loc[journals_export['name'].str.contains('(Online)')] - - -# ## Table sherpa_policies - -# In[40]: - - -# creation du DF -col_names = ['journal', - 'issn', - 'sherpa_id', - 'sherpa_uri', - 'open_access_prohibited', - 'additional_oa_fee', - 'article_version', - 'license', - 'embargo', - 'prerequisites', - 'prerequisite_funders', - 'prerequisite_funders_name', - 'prerequisite_funders_fundref', - 'prerequisite_funders_ror', - 'prerequisite_funders_country', - 'prerequisite_funders_url', - 'prerequisite_funders_sherpa_id', - 'prerequisite_subjects', - 'location', - 'locations_ir', - 'locations_not_ir', - 'named_repository', - 'named_academic_social_network', - 'copyright_owner', - 'publisher_deposit', - 'archiving', - 'conditions', - 'public_notes' - ] -sherpa_policies = pd.DataFrame(columns = col_names) -sherpa_policies - - -# In[41]: - - -# dédoublonage par journal id -issn_dedup = issn.drop_duplicates(subset='journal') -issn_dedup - - -# In[42]: - - -# type de repositories qui provoquent archiving = 1 : -# tous les types : 'academic_social_network', 'any_repository', 'any_website', 'authors_homepage', -# 'funder_designated_location', 'institutional_repository', 'institutional_website', 'named_academic_social_network', -# 'named_repository', 'non_commercial_institutional_repository', 'non_commercial_repository', -# 'non_commercial_social_network', 'non_commercial_subject_repository', 'non_commercial_website', -# 'preprint_repository', 'subject_repository', 'this_journal' -repositories_archiving = ['any_repository', - 'institutional_repository', - 'institutional_website', - 'non_commercial_institutional_repository', - 'non_commercial_repository', - 'any_website', - 'non_commercial_website'] - -# extraction des termes -for index, row in issn_dedup.iterrows(): - journal_id = row['journal'] - journal_issn = row['issn'] - # boucle des fichiers json - # print(row['format']) - if (((index/10) - int(index/10)) == 0) : - print(index) - # test d'existance du fichier - if os.path.exists('sherpa/data/' + journal_issn + '.json'): - with open('sherpa/data/' + journal_issn + '.json', 'r', encoding='utf-8') as f: - data = json.load(f) - # initialisation des variables à extraire - sherpa_id = np.nan - sherpa_uri = np.nan - open_access_prohibited = np.nan - location = np.nan - locations_ir = '' - locations_not_ir = '' - additional_oa_fee = np.nan - article_versions = np.nan - article_version = np.nan - licenses = [] - embargo = 0 - prerequisites = np.nan - prerequisite_funders = np.nan - prerequisite_funders_name = np.nan - prerequisite_funders_fundref = np.nan - prerequisite_funders_ror = np.nan - prerequisite_funders_country = np.nan - prerequisite_funders_url = np.nan - prerequisite_funders_sherpa_id = np.nan - prerequisite_subjects = np.nan - named_repository = np.nan - named_academic_social_network = np.nan - copyright_owner = np.nan - publisher_deposit = np.nan - archiving = np.nan - conditions = np.nan - public_notes = np.nan - if (len(data['items']) > 0): - if ('id' in data['items'][0]): - sherpa_id = data['items'][0]['id'] - # test si l'id est déjà présent - if sherpa_id in sherpa_policies['sherpa_id'] : - print('SKIP ' + str(sherpa_id)) - else : - poilicies = data['items'][0]['publisher_policy'] - for poilicy in poilicies: - # initialisation des variables à extraire - sherpa_uri = np.nan - open_access_prohibited = np.nan - if ('uri' in poilicy): - sherpa_uri = poilicy['uri'] - if ('open_access_prohibited' in poilicy): - open_access_prohibited = poilicy['open_access_prohibited'] - if ('permitted_oa' in poilicy): - poas = poilicy['permitted_oa'] - for poa in poas: - additional_oa_fee = np.nan - article_versions = np.nan - article_version = np.nan - licenses = [] - embargo = 0 - prerequisites = np.nan - prerequisite_funders = np.nan - prerequisite_funders_name = np.nan - prerequisite_funders_fundref = np.nan - prerequisite_funders_ror = np.nan - prerequisite_funders_country = np.nan - prerequisite_funders_url = np.nan - prerequisite_funders_sherpa_id = np.nan - prerequisite_subjects = np.nan - named_repository = np.nan - named_academic_social_network = np.nan - locations_ir = '' - locations_not_ir = '' - copyright_owner = np.nan - conditions = np.nan - public_notes = np.nan - if ('additional_oa_fee' in poa): - additional_oa_fee = poa['additional_oa_fee'] - if ('location' in poa): - archiving = 0 - location = '' - mylocations = poa['location']['location'] - mylocations_text = poa['location']['location_phrases'] - if (type(mylocations) is not list): - mylocations = [mylocations] - location = ' ; '.join(mylocations) - for locationi in mylocations: - if locationi in repositories_archiving : - archiving = archiving + 1 - for locationi_text in mylocations_text: - if locationi_text['value'] == locationi : - if locations_ir == '': - locations_ir = locations_ir + locationi_text['phrase'] - else : - if locationi_text['phrase'] not in locations_ir : - locations_ir = locations_ir + ' ; ' + locationi_text['phrase'] - else : - for locationi_text in mylocations_text: - if locationi_text['value'] == locationi : - if locations_not_ir == '': - locations_not_ir = locations_not_ir + locationi_text['phrase'] - else : - if locationi_text['phrase'] not in locations_not_ir : - locations_not_ir = locations_not_ir + ' ; ' + locationi_text['phrase'] - # print (archiving) - if archiving > 0: - archiving = True - else : - archiving = False - if ('named_repository' in poa['location']): - if (type(poa['location']['named_repository']) is list): - named_repository = ' ; '.join(poa['location']['named_repository']) - else : - named_repository = poa['location']['named_repository'] - locations_not_ir = locations_not_ir.replace('Named Repository', named_repository) - locations_ir = locations_ir.replace('Named Repository', named_repository) - if ('named_academic_social_network' in poa['location']): - if (type(poa['location']['named_academic_social_network']) is list): - named_academic_social_network = ' ; '.join(poa['location']['named_academic_social_network']) - else : - named_academic_social_network = poa['location']['named_academic_social_network'] - locations_not_ir = locations_not_ir.replace('Named Academic Social Network', named_academic_social_network) - locations_ir = locations_ir.replace('Named Academic Social Network', named_academic_social_network) - if ('embargo' in poa): - # print(poa['embargo']) - embargo_amount = 0 - if ('amount' in poa['embargo']): - embargo_amount = poa['embargo']['amount'] - if ('units' in poa['embargo']): - if (poa['embargo']['units'] == 'months') : - embargo = embargo_amount - elif (poa['embargo']['units'] == 'years') : - embargo = embargo_amount*12 - elif (poa['embargo']['units'] == 'weeks') : - embargo = int(embargo_amount/4) - if (embargo == 0): - embargo = 1 - elif (poa['embargo']['units'] == 'days') : - embargo = int(embargo_amount/30) - if (embargo == 0): - embargo = 1 - else : - embargo = embargo_amount - if ('prerequisites' in poa): - if 'prerequisites' in poa['prerequisites'] : - if (type(poa['prerequisites']['prerequisites']) is list): - prerequisites = ' ; '.join(poa['prerequisites']['prerequisites']) - else: - prerequisites = poa['prerequisites']['prerequisites'] - if ('prerequisite_funders' in poa['prerequisites']): - prerequisite_funders = True - # prerequisite_funders = poa['prerequisites']['prerequisite_funders'] - # if (type(poa['prerequisites']['prerequisite_funders']) is list): - # prerequisite_funders = ' ; '.join(poa['prerequisites']['prerequisite_funders']) - # else: - # prerequisite_funders = poa['prerequisites']['prerequisite_funders'] - if ('prerequisite_subjects' in poa['prerequisites']): - prerequisite_subjects = True - # prerequisite_subjects = poa['prerequisites']['prerequisite_subjects'] - # if (type(poa['prerequisite_subjects']) is list): - # prerequisite_subjects = ' ; '.join(poa['prerequisite_subjects']) - # else: - # prerequisite_subjects = poa['prerequisite_subjects'] - if ('copyright_owner' in poa): - copyright_owner = poa['copyright_owner'] - if ('publisher_deposit' in poa): - publisher_deposit = '' - if (type(poa['publisher_deposit']) is list): - for deposit in poa['publisher_deposit']: - if 'type' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['type'] - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + ' (' + deposit['repository_metadata']['name'][0]['name'] + ')' - else : - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['name'][0]['name'] - publisher_deposit = publisher_deposit + ' ; ' - else : - deposit = poa['publisher_deposit'] - if 'type' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['type'] - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + ' (' + deposit['repository_metadata']['name'][0]['name'] + ')' - else : - if 'name' in deposit['repository_metadata']: - publisher_deposit = publisher_deposit + deposit['repository_metadata']['name'][0]['name'] - publisher_deposit = publisher_deposit + ' ; ' - # print (publisher_deposit) - if ('conditions' in poa): - if (type(poa['conditions']) is list): - conditions = ' ; '.join(poa['conditions']) - else: - conditions = poa['conditions'] - if ('public_notes' in poa): - if (type(poa['public_notes']) is list): - public_notes = ' ; '.join(poa['public_notes']) - else: - public_notes = poa['public_notes'] - if ('license' in poa): - licenses = poa['license'] - if (type(licenses) is not list): - licenses = [licenses] - else : - licenses = [''] - # avec article version - if ('article_version' in poa): - article_versions = poa['article_version'] - for article_version in article_versions: - for license in licenses: - if ('license' in license): - mylicense = license['license'] - else : - mylicense = '' - # avec prerequisites - if ('prerequisites' in poa) : - # avec prerequisites_funders - if ('prerequisite_funders' in poa['prerequisites']): - for prerequisite_fundersi in poa['prerequisites']['prerequisite_funders'] : - prerequisite_funders_name = prerequisite_fundersi['funder_metadata']['name'][0]['name'] - if 'acronym' in prerequisite_fundersi['funder_metadata']['name'][0]: - prerequisite_funders_name = prerequisite_funders_name + ' (' + prerequisite_fundersi['funder_metadata']['name'][0]['acronym'] + ')' - if 'identifiers' in prerequisite_fundersi['funder_metadata'] : - for fund_identifier in prerequisite_fundersi['funder_metadata']['identifiers'] : - if fund_identifier['type'] == 'fundref': - prerequisite_funders_fundref = fund_identifier['identifier'] - if fund_identifier['type'] == 'ror': - prerequisite_funders_ror = fund_identifier['identifier'] - if 'country' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_country = prerequisite_fundersi['funder_metadata']['country'] - if 'url' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_url = prerequisite_fundersi['funder_metadata']['url'][0]['url'] - prerequisite_funders_sherpa_id = prerequisite_fundersi['funder_metadata']['id'] - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites_funders - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - - # sans article version - else : - if (type(licenses) is not list): - licenses = [licenses] - for license in licenses: - if ('license' in license): - mylicense = license['license'] - else : - mylicense = '' - # avec prerequisites - if ('prerequisites' in poa) : - # avec prerequisites_funders - if ('prerequisite_funders' in poa['prerequisites']): - for prerequisite_fundersi in poa['prerequisites']['prerequisite_funders'] : - prerequisite_funders_name = prerequisite_fundersi['funder_metadata']['name'][0]['name'] - if 'acronym' in prerequisite_fundersi['funder_metadata']['name'][0]: - prerequisite_funders_name = prerequisite_funders_name + ' (' + prerequisite_fundersi['funder_metadata']['name'][0]['acronym'] + ')' - if 'identifiers' in prerequisite_fundersi['funder_metadata'] : - for fund_identifier in prerequisite_fundersi['funder_metadata']['identifiers'] : - if fund_identifier['type'] == 'fundref': - prerequisite_funders_fundref = fund_identifier['identifier'] - if fund_identifier['type'] == 'ror': - prerequisite_funders_ror = fund_identifier['identifier'] - if 'country' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_country = prerequisite_fundersi['funder_metadata']['country'] - if 'url' in prerequisite_fundersi['funder_metadata']: - prerequisite_funders_url = prerequisite_fundersi['funder_metadata']['url'][0]['url'] - prerequisite_funders_sherpa_id = prerequisite_fundersi['funder_metadata']['id'] - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites_funders - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans prerequisites - else : - sherpa_policies = sherpa_policies.append({'journal' : journal_id, - 'issn' : journal_issn, - 'sherpa_id' : sherpa_id, - 'sherpa_uri' : sherpa_uri, - 'open_access_prohibited' : open_access_prohibited, - 'additional_oa_fee' : additional_oa_fee, - 'article_version' : article_version, - 'license' : mylicense, - 'embargo' : embargo, - 'prerequisites' : prerequisites, - 'prerequisite_funders' : prerequisite_funders, - 'prerequisite_funders_name' : prerequisite_funders_name, - 'prerequisite_funders_fundref' : prerequisite_funders_fundref, - 'prerequisite_funders_ror' : prerequisite_funders_ror, - 'prerequisite_funders_country' : prerequisite_funders_country, - 'prerequisite_funders_url' : prerequisite_funders_url, - 'prerequisite_funders_sherpa_id' : prerequisite_funders_sherpa_id, - 'prerequisite_subjects' : prerequisite_subjects, - 'location' : location, - 'locations_ir' : locations_ir, - 'locations_not_ir' : locations_not_ir, - 'named_repository' : named_repository, - 'named_academic_social_network' : named_academic_social_network, - 'copyright_owner' : copyright_owner, - 'publisher_deposit' : publisher_deposit, - 'archiving' : archiving, - 'conditions' : conditions, - 'public_notes' : public_notes - }, ignore_index=True) - # sans permitted_oa - else : - print ('permitted_oa MISSING') - else : - print ('id MISSING') - - -# In[43]: - - -sherpa_policies - - -# In[44]: - - -# convertir l'index en id -sherpa_policies = sherpa_policies.reset_index() -# ajout de l'id avec l'index + 1 -sherpa_policies['id'] = sherpa_policies['index'] + 1 -del sherpa_policies['index'] -sherpa_policies - - -# In[45]: - - -# export csv -sherpa_policies.to_csv('sample/sherpa_policies_brut.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[46]: - - -# export excel -sherpa_policies.to_excel('sample/sherpa_policies_brut.xlsx', index=False) - - -# ## Calcul de la catégorie "green" et export final des journaux - -# In[47]: - - -sherpa_policies - - -# In[48]: - - -sherpa_policies_ir = sherpa_policies.loc[(sherpa_policies['archiving'] == True) & (sherpa_policies['article_version'] == 'published') & (sherpa_policies['prerequisite_funders'].isna())][['journal', 'embargo', 'license', 'conditions']] -sherpa_policies_ir - - -# In[49]: - - -# dedup -sherpa_policies_ir_id = sherpa_policies_ir[['journal', 'embargo']].sort_values(by=['journal', 'embargo']) -sherpa_policies_ir_dedup = sherpa_policies_ir_id.drop_duplicates(subset='journal') -sherpa_policies_ir_dedup - - -# In[50]: - - -# ajout de la ctégorie green (2) -sherpa_policies_ir_dedup['oa_status'] = 2 -sherpa_policies_ir_dedup - - -# In[51]: - - -# merge avec les revues -sherpa_policies_ir_dedup = sherpa_policies_ir_dedup.rename(columns={'journal' : 'id'}) -journals_export = pd.merge(journals_export, sherpa_policies_ir_dedup, on='id', how='left') -journals_export - - -# In[52]: - - -# choix de la catégorie OA -journals_export['oa_status'] = journals_export['oa_status_x'] -journals_export.loc[(journals_export['oa_status_x'] == 1) & (journals_export['oa_status_y'].notna()), 'oa_status'] = journals_export['oa_status_y'] -journals_export - - -# In[53]: - - -# 6 : Diamond -# 5 : Gold -# 4 : Full -# 3 : Hybrid -# 2 : Green -# 1 : UNKNOWN -journals_export['oa_status'].value_counts() - - -# In[54]: - - -del journals_export['embargo'] -del journals_export['oa_status_x'] -del journals_export['oa_status_y'] -journals_export - - -# In[55]: - - -journals_export['oa_status'] = journals_export['oa_status'].astype(int) -journals_export - - -# In[56]: - - -# export csv -journals_export.to_csv('sample/journal_fin_sherpa.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[57]: - - -# export excel -journals_export.to_excel('sample/journal_fin_sherpa.xlsx', index=False) - - -# In[58]: - - -# export csv -sherpa_policies_ir_dedup.to_csv('sample/journal_ir.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[59]: - - -# export excel -sherpa_policies_ir_dedup.to_excel('sample/journal_ir.xlsx', index=False) - - -# In[ ]: - - - - diff --git a/import_scripts/07_oacct_sherpa_publishers.md b/import_scripts/07_oacct_sherpa_publishers.md deleted file mode 100644 index 2a7ef957..00000000 --- a/import_scripts/07_oacct_sherpa_publishers.md +++ /dev/null @@ -1,4401 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Table Journals Publishers : ajout des informations de Sherpa - - -```python -import pandas as pd -import csv -import json -import numpy as np -``` - - -```python -publishers_issn = pd.read_csv('sample/publishers_brut.tsv', encoding='utf-8', header=0, sep='\t') -publishers_issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idnameid
0Revue_Médicale_SuisseRevue Médicale Suisse1
1American_Physical_SocietyAmerican Physical Society2
2Public_Library_of_SciencePublic Library of Science3
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...4
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...5
............
376Tipografia_La_CommercialeTipografia La Commerciale377
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378
378Excerpta_MedicaExcerpta Medica379
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380
380999999UNKNOWN999999
-

381 rows × 3 columns

-
- - - - -```python -# import ids -publisher_ids = pd.read_csv('sample/journals_publishers_ids.tsv', encoding='utf-8', header=0, sep='\t') -publisher_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idpublisher
011
122
233
344
445
.........
94099747
94199875
9429998
9431000119
9441001217
-

945 rows × 2 columns

-
- - - - -```python -# renommage id -publisher_ids = publisher_ids.rename(columns = {'id': 'journal'}) -publisher_ids = publisher_ids.rename(columns = {'publisher': 'id'}) -``` - - -```python -# dédoublonage par publisher id -publisher_ids_dedup = publisher_ids.drop_duplicates(subset='id') -publisher_ids_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalid
011
122
233
344
445
.........
929987376
930987377
932989378
934991379
937994380
-

380 rows × 2 columns

-
- - - - -```python -# merge avec journals -publisher = pd.merge(publishers_issn, publisher_ids_dedup, on='id', how='left') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idnameidjournal
0Revue_Médicale_SuisseRevue Médicale Suisse11.0
1American_Physical_SocietyAmerican Physical Society22.0
2Public_Library_of_SciencePublic Library of Science33.0
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...44.0
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...54.0
...............
376Tipografia_La_CommercialeTipografia La Commerciale377987.0
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0
378Excerpta_MedicaExcerpta Medica379991.0
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380994.0
380999999UNKNOWN999999NaN
-

381 rows × 4 columns

-
- - - - -```python -# ajout des valeurs de sherpa -publisher_sherpa = pd.read_csv('sample/publisher_sherpa.tsv', encoding='utf-8', header=0, sep='\t') -publisher_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_idnamecountrytypeurl
053245John Wiley and Sonsgbformer_publisherhttp://www.wiley.com/
14984American Chemical Societyussociety_publisherhttp://pubs.acs.org/
2789126Acoustical Society of Americaussociety_publisherhttp://acousticalsociety.org/
31663291Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
48073291Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
.....................
80387010American Physical Societyussociety_publisherhttp://www.aps.org/
8044110American Physical Societyussociety_publisherhttp://www.aps.org/
8058010American Physical Societyussociety_publisherhttp://www.aps.org/
80653310American Physical Societyussociety_publisherhttp://www.aps.org/
80760810American Physical Societyussociety_publisherhttp://www.aps.org/
-

808 rows × 6 columns

-
- - - - -```python -# renommage ids -publisher_sherpa = publisher_sherpa.rename(columns = {'publisher_id': 'publisher_id_sherpa', 'url': 'website_sherpa', 'country': 'iso_code'}) -``` - - -```python -# merge avec ids journals -publisher = pd.merge(publisher, publisher_sherpa, on='journal', how='left') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idname_xidjournalpublisher_id_sherpaname_yiso_codetypewebsite_sherpa
0Revue_Médicale_SuisseRevue Médicale Suisse11.0NaNNaNNaNNaNNaN
1American_Physical_SocietyAmerican Physical Society22.010.0American Physical Societyussociety_publisherhttp://www.aps.org/
2Public_Library_of_SciencePublic Library of Science33.0112.0Public Library of Scienceuscommercial_publisherhttp://www.plos.org/
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...44.0NaNNaNNaNNaNNaN
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...54.0NaNNaNNaNNaNNaN
..............................
376Tipografia_La_CommercialeTipografia La Commerciale377987.03291.0Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaNNaNNaNNaNNaN
378Excerpta_MedicaExcerpta Medica379991.030.0Elsevieruscommercial_publisherhttp://www.elsevier.com/
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380994.0NaNNaNNaNNaNNaN
380999999UNKNOWN999999NaNNaNNaNNaNNaNNaN
-

381 rows × 9 columns

-
- - - - -```python -# renommage names -publisher = publisher.rename(columns = {'name_x': 'name_issn', 'name_y': 'name_sherpa'}) -``` - - -```python -# ajout des informations à partir des revues -publisher_journals = pd.read_csv('sample/journals_publishers_brut.tsv', encoding='utf-8', header=0, sep='\t', usecols=['id', 'url']) -publisher_journals -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idurl
01NaN
12http://prl.aps.org/
23http://www.plosone.org/
34NaN
45http://ojps.aip.org/prbo/
.........
906997NaN
907998http://www.jpedsurg.org
908999http://www.springerlink.com/content/100451
9091000NaN
9101001https://www.physiology.org/journal/jappl
-

911 rows × 2 columns

-
- - - - -```python -# renommage id -publisher_journals = publisher_journals.rename(columns = {'id': 'journal'}) -``` - - -```python -# merge avec ids journals -publisher = pd.merge(publisher, publisher_journals, on='journal', how='left') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher_idname_issnidjournalpublisher_id_sherpaname_sherpaiso_codetypewebsite_sherpaurl
0Revue_Médicale_SuisseRevue Médicale Suisse11.0NaNNaNNaNNaNNaNNaN
1American_Physical_SocietyAmerican Physical Society22.010.0American Physical Societyussociety_publisherhttp://www.aps.org/http://prl.aps.org/
2Public_Library_of_SciencePublic Library of Science33.0112.0Public Library of Scienceuscommercial_publisherhttp://www.plos.org/http://www.plosone.org/
3The_Global_Studies_Institute_de_l’Université_d...The Global Studies Institute de l’Université d...44.0NaNNaNNaNNaNNaNNaN
4Universitat_de_València,_Departamento_de_Teorí...Universitat de València, Departamento de Teorí...54.0NaNNaNNaNNaNNaNNaN
.................................
376Tipografia_La_CommercialeTipografia La Commerciale377987.03291.0Springergbcommercial_publisherhttps://www.springernature.com/gp/products/jou...NaN
377Red.:_Prof._Dr._F._Cavalli,_Istituto_oncologic...Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaNNaNNaNNaNNaNNaN
378Excerpta_MedicaExcerpta Medica379991.030.0Elsevieruscommercial_publisherhttp://www.elsevier.com/NaN
379Generative_Grammar_Group_of_the_Department_of_...Generative Grammar Group of the Department of ...380994.0NaNNaNNaNNaNNaNNaN
380999999UNKNOWN999999NaNNaNNaNNaNNaNNaNNaN
-

381 rows × 10 columns

-
- - - - -```python -# renommage names -del publisher['publisher_id'] -del publisher['publisher_id_sherpa'] -del publisher['type'] -publisher = publisher.rename(columns = {'url' : 'website_issn_journal'}) -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name_issnidjournalname_sherpaiso_codewebsite_sherpawebsite_issn_journal
0Revue Médicale Suisse11.0NaNNaNNaNNaN
1American Physical Society22.0American Physical Societyushttp://www.aps.org/http://prl.aps.org/
2Public Library of Science33.0Public Library of Scienceushttp://www.plos.org/http://www.plosone.org/
3The Global Studies Institute de l’Université d...44.0NaNNaNNaNNaN
4Universitat de València, Departamento de Teorí...54.0NaNNaNNaNNaN
........................
376Tipografia La Commerciale377987.0Springergbhttps://www.springernature.com/gp/products/jou...NaN
377Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaNNaNNaNNaN
378Excerpta Medica379991.0Elsevierushttp://www.elsevier.com/NaN
379Generative Grammar Group of the Department of ...380994.0NaNNaNNaNNaN
380UNKNOWN999999NaNNaNNaNNaNNaN
-

381 rows × 7 columns

-
- - - - -```python -# ajout des champs vides des vides et int -publisher['city'] = '' -publisher['state'] = '' -publisher['oa_policies'] = '' -publisher['starting_year'] = 0 -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name_issnidjournalname_sherpaiso_codewebsite_sherpawebsite_issn_journalcitystateoa_policiesstarting_year
0Revue Médicale Suisse11.0NaNNaNNaNNaN0
1American Physical Society22.0American Physical Societyushttp://www.aps.org/http://prl.aps.org/0
2Public Library of Science33.0Public Library of Scienceushttp://www.plos.org/http://www.plosone.org/0
3The Global Studies Institute de l’Université d...44.0NaNNaNNaNNaN0
4Universitat de València, Departamento de Teorí...54.0NaNNaNNaNNaN0
....................................
376Tipografia La Commerciale377987.0Springergbhttps://www.springernature.com/gp/products/jou...NaN0
377Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaNNaNNaNNaN0
378Excerpta Medica379991.0Elsevierushttp://www.elsevier.com/NaN0
379Generative Grammar Group of the Department of ...380994.0NaNNaNNaNNaN0
380UNKNOWN999999NaNNaNNaNNaNNaN0
-

381 rows × 11 columns

-
- - - - -```python -# iso_code en majuscules -publisher['iso_code'] = publisher['iso_code'].str.upper() -# ajout de la valeur pour unknown -publisher['iso_code'] = publisher['iso_code'].fillna('__') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name_issnidjournalname_sherpaiso_codewebsite_sherpawebsite_issn_journalcitystateoa_policiesstarting_year
0Revue Médicale Suisse11.0NaN__NaNNaN0
1American Physical Society22.0American Physical SocietyUShttp://www.aps.org/http://prl.aps.org/0
2Public Library of Science33.0Public Library of ScienceUShttp://www.plos.org/http://www.plosone.org/0
3The Global Studies Institute de l’Université d...44.0NaN__NaNNaN0
4Universitat de València, Departamento de Teorí...54.0NaN__NaNNaN0
....................................
376Tipografia La Commerciale377987.0SpringerGBhttps://www.springernature.com/gp/products/jou...NaN0
377Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaN__NaNNaN0
378Excerpta Medica379991.0ElsevierUShttp://www.elsevier.com/NaN0
379Generative Grammar Group of the Department of ...380994.0NaN__NaNNaN0
380UNKNOWN999999NaNNaN__NaNNaN0
-

381 rows × 11 columns

-
- - - - -```python -# merge avec countries -country = pd.read_csv('sample/country.tsv', usecols=('iso_code', 'id'), encoding='utf-8', header=0, sep='\t') -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iso_codeid
0AF1
1AL2
2DZ3
3AS4
4AD5
.........
246ZM247
247ZW248
248AX249
249OI250
250__999999
-

251 rows × 2 columns

-
- - - - -```python -country = country.rename(columns={'id': 'country'}) -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
iso_codecountry
0AF1
1AL2
2DZ3
3AS4
4AD5
.........
246ZM247
247ZW248
248AX249
249OI250
250__999999
-

251 rows × 2 columns

-
- - - - -```python -publisher = pd.merge(publisher, country, on='iso_code', how='left') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name_issnidjournalname_sherpaiso_codewebsite_sherpawebsite_issn_journalcitystateoa_policiesstarting_yearcountry
0Revue Médicale Suisse11.0NaN__NaNNaN0999999
1American Physical Society22.0American Physical SocietyUShttp://www.aps.org/http://prl.aps.org/0236
2Public Library of Science33.0Public Library of ScienceUShttp://www.plos.org/http://www.plosone.org/0236
3The Global Studies Institute de l’Université d...44.0NaN__NaNNaN0999999
4Universitat de València, Departamento de Teorí...54.0NaN__NaNNaN0999999
.......................................
376Tipografia La Commerciale377987.0SpringerGBhttps://www.springernature.com/gp/products/jou...NaN0234
377Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaN__NaNNaN0999999
378Excerpta Medica379991.0ElsevierUShttp://www.elsevier.com/NaN0236
379Generative Grammar Group of the Department of ...380994.0NaN__NaNNaN0999999
380UNKNOWN999999NaNNaN__NaNNaN0999999
-

381 rows × 12 columns

-
- - - - -```python -# garder sherpa puis issn.org -publisher.loc[publisher['name_sherpa'].notna(), 'name'] = publisher['name_sherpa'] -publisher.loc[publisher['name_sherpa'].isna(), 'name'] = publisher['name_issn'] -publisher.loc[publisher['website_sherpa'].notna(), 'website'] = publisher['website_sherpa'] -publisher.loc[publisher['website_sherpa'].isna(), 'website'] = publisher['website_issn_journal'] -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
name_issnidjournalname_sherpaiso_codewebsite_sherpawebsite_issn_journalcitystateoa_policiesstarting_yearcountrynamewebsite
0Revue Médicale Suisse11.0NaN__NaNNaN0999999Revue Médicale SuisseNaN
1American Physical Society22.0American Physical SocietyUShttp://www.aps.org/http://prl.aps.org/0236American Physical Societyhttp://www.aps.org/
2Public Library of Science33.0Public Library of ScienceUShttp://www.plos.org/http://www.plosone.org/0236Public Library of Sciencehttp://www.plos.org/
3The Global Studies Institute de l’Université d...44.0NaN__NaNNaN0999999The Global Studies Institute de l’Université d...NaN
4Universitat de València, Departamento de Teorí...54.0NaN__NaNNaN0999999Universitat de València, Departamento de Teorí...NaN
.............................................
376Tipografia La Commerciale377987.0SpringerGBhttps://www.springernature.com/gp/products/jou...NaN0234Springerhttps://www.springernature.com/gp/products/jou...
377Red.: Prof. Dr. F. Cavalli, Istituto oncologic...378989.0NaN__NaNNaN0999999Red.: Prof. Dr. F. Cavalli, Istituto oncologic...NaN
378Excerpta Medica379991.0ElsevierUShttp://www.elsevier.com/NaN0236Elsevierhttp://www.elsevier.com/
379Generative Grammar Group of the Department of ...380994.0NaN__NaNNaN0999999Generative Grammar Group of the Department of ...NaN
380UNKNOWN999999NaNNaN__NaNNaN0999999UNKNOWNNaN
-

381 rows × 14 columns

-
- - - - -```python -# garder les champs utiles pour l'éditeur -publisher_export = publisher[['id', 'name', 'country', 'city', 'state', 'starting_year', 'website', 'oa_policies']] -``` - - -```python -# supprimer les doublons -publisher_export = publisher_export.drop_duplicates(subset='id') -publisher_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamecountrycitystatestarting_yearwebsiteoa_policies
01Revue Médicale Suisse9999990NaN
12American Physical Society2360http://www.aps.org/
23Public Library of Science2360http://www.plos.org/
34The Global Studies Institute de l’Université d...9999990NaN
45Universitat de València, Departamento de Teorí...9999990NaN
...........................
376377Springer2340https://www.springernature.com/gp/products/jou...
377378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...9999990NaN
378379Elsevier2360http://www.elsevier.com/
379380Generative Grammar Group of the Department of ...9999990NaN
380999999UNKNOWN9999990NaN
-

381 rows × 8 columns

-
- - - - -```python -# remplacement des vides et id à int -publisher_export['website'] = publisher_export['website'].fillna('') -publisher_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamecountrycitystatestarting_yearwebsiteoa_policies
01Revue Médicale Suisse9999990
12American Physical Society2360http://www.aps.org/
23Public Library of Science2360http://www.plos.org/
34The Global Studies Institute de l’Université d...9999990
45Universitat de València, Departamento de Teorí...9999990
...........................
376377Springer2340https://www.springernature.com/gp/products/jou...
377378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...9999990
378379Elsevier2360http://www.elsevier.com/
379380Generative Grammar Group of the Department of ...9999990
380999999UNKNOWN9999990
-

381 rows × 8 columns

-
- - - - -```python -# merge pour avoir les titres -publisher_ids_dedup = pd.merge(publisher_ids_dedup, publisher_export[['id', 'name']], on='id', how='left') -publisher_ids_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalidname
011Revue Médicale Suisse
122American Physical Society
233Public Library of Science
344The Global Studies Institute de l’Université d...
445Universitat de València, Departamento de Teorí...
............
375987376Springer
376987377Springer
377989378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...
378991379Elsevier
379994380Generative Grammar Group of the Department of ...
-

380 rows × 3 columns

-
- - - - -```python -# garder les ids avant le dédoublonage pour la correction du publisher_ids_dedup -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'id': 'publisher_av_dedup'}) -publisher_ids_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalpublisher_av_dedupname
011Revue Médicale Suisse
122American Physical Society
233Public Library of Science
344The Global Studies Institute de l’Université d...
445Universitat de València, Departamento de Teorí...
............
375987376Springer
376987377Springer
377989378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...
378991379Elsevier
379994380Generative Grammar Group of the Department of ...
-

380 rows × 3 columns

-
- - - - -```python -publisher_export_dedup = publisher_export.drop_duplicates(subset='name') -publisher_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamecountrycitystatestarting_yearwebsiteoa_policies
01Revue Médicale Suisse9999990
12American Physical Society2360http://www.aps.org/
23Public Library of Science2360http://www.plos.org/
34The Global Studies Institute de l’Université d...9999990
45Universitat de València, Departamento de Teorí...9999990
...........................
371372[American Medical Association]9999990http://archneur.jamanetwork.com/issues.aspx
374375Société botanique de Genève9999990
377378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...9999990
379380Generative Grammar Group of the Department of ...9999990
380999999UNKNOWN9999990
-

196 rows × 8 columns

-
- - - - -```python -del publisher_export_dedup['id'] -# convertir l'index en id -publisher_export_dedup = publisher_export_dedup.reset_index() -# ajout de l'id avec l'index + 1 -publisher_export_dedup['id'] = publisher_export_dedup['index'] + 1 -del publisher_export_dedup['index'] -publisher_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
0Revue Médicale Suisse99999901
1American Physical Society2360http://www.aps.org/2
2Public Library of Science2360http://www.plos.org/3
3The Global Studies Institute de l’Université d...99999904
4Universitat de València, Departamento de Teorí...99999905
...........................
191[American Medical Association]9999990http://archneur.jamanetwork.com/issues.aspx372
192Société botanique de Genève9999990375
193Red.: Prof. Dr. F. Cavalli, Istituto oncologic...9999990378
194Generative Grammar Group of the Department of ...9999990380
195UNKNOWN9999990381
-

196 rows × 8 columns

-
- - - - -```python -del publisher_export_dedup['id'] -# convertir l'index en id -publisher_export_dedup = publisher_export_dedup.reset_index() -# ajout de l'id avec l'index + 1 -publisher_export_dedup['id'] = publisher_export_dedup['index'] + 1 -del publisher_export_dedup['index'] -publisher_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
0Revue Médicale Suisse99999901
1American Physical Society2360http://www.aps.org/2
2Public Library of Science2360http://www.plos.org/3
3The Global Studies Institute de l’Université d...99999904
4Universitat de València, Departamento de Teorí...99999905
...........................
191[American Medical Association]9999990http://archneur.jamanetwork.com/issues.aspx192
192Société botanique de Genève9999990193
193Red.: Prof. Dr. F. Cavalli, Istituto oncologic...9999990194
194Generative Grammar Group of the Department of ...9999990195
195UNKNOWN9999990196
-

196 rows × 8 columns

-
- - - - -```python -# merge avec les ids d'avant Sherpa -publisher_ids_dedup = pd.merge(publisher_ids_dedup, publisher_export_dedup[['id', 'name']], on='name', how='left') -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'id': 'publisher'}) -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'journal': 'id'}) -publisher_ids_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idpublisher_av_dedupnamepublisher
011Revue Médicale Suisse1
122American Physical Society2
233Public Library of Science3
344The Global Studies Institute de l’Université d...4
445Universitat de València, Departamento de Teorí...5
...............
375987376Springer45
376987377Springer45
377989378Red.: Prof. Dr. F. Cavalli, Istituto oncologic...194
378991379Elsevier11
379994380Generative Grammar Group of the Department of ...195
-

380 rows × 4 columns

-
- - - - -```python -# concat valeurs avec même id -del publisher_ids_dedup['publisher_av_dedup'] -del publisher_ids_dedup['name'] -publisher_ids_dedup['publisher'] = publisher_ids_dedup['publisher'].astype(str) -publisher_ids_dedup_grouped = publisher_ids_dedup.groupby('id').agg({'publisher': lambda x: ', '.join(x)}) -publisher_ids_dedup_grouped -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
publisher
id
11
22
33
44, 5
52
......
986193
98745, 45
989194
99111
994195
-

366 rows × 1 columns

-
- - - - -```python -# modifs dans les journaux -journal = pd.read_csv('sample/journal_fin_sherpa.tsv', encoding='utf-8', header=0, sep='\t') -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguagepublisherdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreoa_status
01Revue médicale suisseRev. méd. suisse20059999NaN215138100000NaN1
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/236124200010NaN2
23PLoS ONENaN20069999http://www.plosone.org/2361243111004.0357145
34EU-topíasEU-topías20119999NaN209124, 138, 402, 2924, 500000NaN1
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/236124600010NaN2
...................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-17262341244700010NaN2
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/2361247500010NaN2
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...83124800111NaN2
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...23412411900010NaN2
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl23612421700000NaN1
-

911 rows × 16 columns

-
- - - - -```python -# merge avec les journaux journal_fin_sherpa -journal = pd.merge(journal, publisher_ids_dedup_grouped, on='id', how='left') -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguagepublisher_xdoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreoa_statuspublisher_y
01Revue médicale suisseRev. méd. suisse20059999NaN215138100000NaN11
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/236124200010NaN22
23PLoS ONENaN20069999http://www.plosone.org/2361243111004.03571453
34EU-topíasEU-topías20119999NaN209124, 138, 402, 2924, 500000NaN14, 5
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/236124600010NaN22
......................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-17262341244700010NaN2NaN
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/2361247500010NaN2NaN
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...83124800111NaN2NaN
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...23412411900010NaN2NaN
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl23612421700000NaN1NaN
-

911 rows × 17 columns

-
- - - - -```python -del journal['publisher_x'] -journal = journal.rename(columns = {'publisher_y': 'publisher'}) -journal -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnamename_short_iso_4starting_yearend_yearwebsitecountrylanguagedoaj_sealdoaj_statuslockssporticonlchqoam_av_scoreoa_statuspublisher
01Revue médicale suisseRev. méd. suisse20059999NaN21513800000NaN11
12Physical Review LettersPhys. rev. lett. (Print)19589999http://prl.aps.org/23612400010NaN22
23PLoS ONENaN20069999http://www.plosone.org/236124111004.03571453
34EU-topíasEU-topías20119999NaN209124, 138, 402, 29200000NaN14, 5
45Physical review B: Condensed matter and materi...Phys. rev., B, Condens. matter mater. phys.19982015http://journals.aps.org/prb/23612400010NaN22
...................................................
906997Smart Materials and StructuresSmart mater. struct. (Print)19929999http://iopscience.iop.org/0964-172623412400010NaN2NaN
907998Journal of Pediatric SurgeryJ. pediatr. surg. (Print)19669999http://www.jpedsurg.org/23612400010NaN2NaN
908999Probability Theory and Related FieldsProbab. theory relat. fields (Internet)uuuu9999http://www.springerlink.com/content/100451/?p=...8312400111NaN2NaN
9091000Renewable EnergyRenew. energy19919999http://www.elsevier.com/wps/product/cws_home/9...23412400010NaN2NaN
9101001Journal of applied physiology: respiratory, en...J. appl. physiol.: respir., environ. exercise ...19771984https://www.physiology.org/journal/jappl23612400000NaN1NaN
-

911 rows × 16 columns

-
- - - - -```python -# esport JSON publisher -result = journal.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/journal.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -journal.to_csv('sample/journal.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -journal.to_excel('sample/journal.xlsx', index=False) -``` - - -```python -# esport JSON publisher -result = publisher_export_dedup.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/publisher.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -publisher_export_dedup.to_csv('sample/publisher.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -publisher_export_dedup.to_excel('sample/publisher.xlsx', index=False) -``` - - -```python - -``` diff --git a/import_scripts/07_oacct_sherpa_publishers.py b/import_scripts/07_oacct_sherpa_publishers.py deleted file mode 100644 index 29af8f7e..00000000 --- a/import_scripts/07_oacct_sherpa_publishers.py +++ /dev/null @@ -1,348 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Table Journals Publishers : ajout des informations de Sherpa - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np - - -# In[2]: - - -publishers_issn = pd.read_csv('sample/publishers_brut.tsv', encoding='utf-8', header=0, sep='\t') -publishers_issn - - -# In[3]: - - -# import ids -publisher_ids = pd.read_csv('sample/journals_publishers_ids.tsv', encoding='utf-8', header=0, sep='\t') -publisher_ids - - -# In[4]: - - -# renommage id -publisher_ids = publisher_ids.rename(columns = {'id': 'journal'}) -publisher_ids = publisher_ids.rename(columns = {'publisher': 'id'}) - - -# In[5]: - - -# dédoublonage par publisher id -publisher_ids_dedup = publisher_ids.drop_duplicates(subset='id') -publisher_ids_dedup - - -# In[6]: - - -# merge avec journals -publisher = pd.merge(publishers_issn, publisher_ids_dedup, on='id', how='left') -publisher - - -# In[7]: - - -# ajout des valeurs de sherpa -publisher_sherpa = pd.read_csv('sample/publisher_sherpa.tsv', encoding='utf-8', header=0, sep='\t') -publisher_sherpa - - -# In[8]: - - -# renommage ids -publisher_sherpa = publisher_sherpa.rename(columns = {'publisher_id': 'publisher_id_sherpa', 'url': 'website_sherpa', 'country': 'iso_code'}) - - -# In[9]: - - -# merge avec ids journals -publisher = pd.merge(publisher, publisher_sherpa, on='journal', how='left') -publisher - - -# In[10]: - - -# renommage names -publisher = publisher.rename(columns = {'name_x': 'name_issn', 'name_y': 'name_sherpa'}) - - -# In[11]: - - -# ajout des informations à partir des revues -publisher_journals = pd.read_csv('sample/journals_publishers_brut.tsv', encoding='utf-8', header=0, sep='\t', usecols=['id', 'url']) -publisher_journals - - -# In[12]: - - -# renommage id -publisher_journals = publisher_journals.rename(columns = {'id': 'journal'}) - - -# In[13]: - - -# merge avec ids journals -publisher = pd.merge(publisher, publisher_journals, on='journal', how='left') -publisher - - -# In[14]: - - -# renommage names -del publisher['publisher_id'] -del publisher['publisher_id_sherpa'] -del publisher['type'] -publisher = publisher.rename(columns = {'url' : 'website_issn_journal'}) -publisher - - -# In[15]: - - -# ajout des champs vides des vides et int -publisher['city'] = '' -publisher['state'] = '' -publisher['oa_policies'] = '' -publisher['starting_year'] = 0 -publisher - - -# In[16]: - - -# iso_code en majuscules -publisher['iso_code'] = publisher['iso_code'].str.upper() -# ajout de la valeur pour unknown -publisher['iso_code'] = publisher['iso_code'].fillna('__') -publisher - - -# In[17]: - - -# merge avec countries -country = pd.read_csv('sample/country.tsv', usecols=('iso_code', 'id'), encoding='utf-8', header=0, sep='\t') -country - - -# In[18]: - - -country = country.rename(columns={'id': 'country'}) -country - - -# In[19]: - - -publisher = pd.merge(publisher, country, on='iso_code', how='left') -publisher - - -# In[20]: - - -# garder sherpa puis issn.org -publisher.loc[publisher['name_sherpa'].notna(), 'name'] = publisher['name_sherpa'] -publisher.loc[publisher['name_sherpa'].isna(), 'name'] = publisher['name_issn'] -publisher.loc[publisher['website_sherpa'].notna(), 'website'] = publisher['website_sherpa'] -publisher.loc[publisher['website_sherpa'].isna(), 'website'] = publisher['website_issn_journal'] -publisher - - -# In[21]: - - -# garder les champs utiles pour l'éditeur -publisher_export = publisher[['id', 'name', 'country', 'city', 'state', 'starting_year', 'website', 'oa_policies']] - - -# In[22]: - - -# supprimer les doublons -publisher_export = publisher_export.drop_duplicates(subset='id') -publisher_export - - -# In[23]: - - -# remplacement des vides et id à int -publisher_export['website'] = publisher_export['website'].fillna('') -publisher_export - - -# In[24]: - - -# merge pour avoir les titres -publisher_ids_dedup = pd.merge(publisher_ids_dedup, publisher_export[['id', 'name']], on='id', how='left') -publisher_ids_dedup - - -# In[25]: - - -# garder les ids avant le dédoublonage pour la correction du publisher_ids_dedup -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'id': 'publisher_av_dedup'}) -publisher_ids_dedup - - -# In[26]: - - -publisher_export_dedup = publisher_export.drop_duplicates(subset='name') -publisher_export_dedup - - -# In[27]: - - -del publisher_export_dedup['id'] -# convertir l'index en id -publisher_export_dedup = publisher_export_dedup.reset_index() -# ajout de l'id avec l'index + 1 -publisher_export_dedup['id'] = publisher_export_dedup['index'] + 1 -del publisher_export_dedup['index'] -publisher_export_dedup - - -# In[28]: - - -del publisher_export_dedup['id'] -# convertir l'index en id -publisher_export_dedup = publisher_export_dedup.reset_index() -# ajout de l'id avec l'index + 1 -publisher_export_dedup['id'] = publisher_export_dedup['index'] + 1 -del publisher_export_dedup['index'] -publisher_export_dedup - - -# In[29]: - - -# merge avec les ids d'avant Sherpa -publisher_ids_dedup = pd.merge(publisher_ids_dedup, publisher_export_dedup[['id', 'name']], on='name', how='left') -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'id': 'publisher'}) -publisher_ids_dedup = publisher_ids_dedup.rename(columns = {'journal': 'id'}) -publisher_ids_dedup - - -# In[30]: - - -# concat valeurs avec même id -del publisher_ids_dedup['publisher_av_dedup'] -del publisher_ids_dedup['name'] -publisher_ids_dedup['publisher'] = publisher_ids_dedup['publisher'].astype(str) -publisher_ids_dedup_grouped = publisher_ids_dedup.groupby('id').agg({'publisher': lambda x: ', '.join(x)}) -publisher_ids_dedup_grouped - - -# In[31]: - - -# modifs dans les journaux -journal = pd.read_csv('sample/journal_fin_sherpa.tsv', encoding='utf-8', header=0, sep='\t') -journal - - -# In[32]: - - -# merge avec les journaux journal_fin_sherpa -journal = pd.merge(journal, publisher_ids_dedup_grouped, on='id', how='left') -journal - - -# In[33]: - - -del journal['publisher_x'] -journal = journal.rename(columns = {'publisher_y': 'publisher'}) -journal - - -# In[34]: - - -# esport JSON publisher -result = journal.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/journal.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[35]: - - -# export csv -journal.to_csv('sample/journal.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[36]: - - -# export excel -journal.to_excel('sample/journal.xlsx', index=False) - - -# In[37]: - - -# esport JSON publisher -result = publisher_export_dedup.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/publisher.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[38]: - - -# export csv -publisher_export_dedup.to_csv('sample/publisher.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[39]: - - -# export excel -publisher_export_dedup.to_excel('sample/publisher.xlsx', index=False) - - -# In[ ]: - - - - diff --git a/import_scripts/08_oacct_sherpa_issns.md b/import_scripts/08_oacct_sherpa_issns.md deleted file mode 100644 index 8989dea2..00000000 --- a/import_scripts/08_oacct_sherpa_issns.md +++ /dev/null @@ -1,2204 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 16.07.2021 - -## Table ISSNs - - -```python -import pandas as pd -import csv -import json -import numpy as np -``` - - -```python -issns = pd.read_csv('sample/issn_brut.tsv', encoding='utf-8', sep='\t') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeid
00001-28150001-2815532PRINT11
11399-00390001-2815532NaN32
20001-48420001-4842498PRINT13
31520-48980001-4842498NaN34
40001-49660001-4966789PRINT15
.....................
17552470-00452470-0045533OTHER31756
17562470-00532470-0045533NaN31757
17572475-99532475-9953608ELECTRONIC21758
17582504-44272504-4427994PRINT11759
17592504-44352504-4427994NaN31760
-

1760 rows × 6 columns

-
- - - -## Ajout du format à partir de Sherpa - - -```python -# ajout du format par sherpa -issn_sherpa = pd.read_csv('sample/issn_sherpa.tsv', encoding='utf-8', sep='\t') -issn_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11print
11399-00390001-2815532NaN32electronic
20001-48420001-4842498PRINT13print
31520-48980001-4842498NaN34electronic
40001-49660001-4966789PRINT15print
........................
17552470-00452470-0045533OTHER31756print
17562470-00532470-0045533NaN31757electronic
17572475-99532475-9953608ELECTRONIC21758electronic
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -issn_sherpa['type'] = issn_sherpa['type'].str.upper() -issn_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11PRINT
11399-00390001-2815532NaN32ELECTRONIC
20001-48420001-4842498PRINT13PRINT
31520-48980001-4842498NaN34ELECTRONIC
40001-49660001-4966789PRINT15PRINT
........................
17552470-00452470-0045533OTHER31756PRINT
17562470-00532470-0045533NaN31757ELECTRONIC
17572475-99532475-9953608ELECTRONIC21758ELECTRONIC
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -issns = pd.merge(issns, issn_sherpa[['issn', 'type']], on='issn', how='outer') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11PRINT
11399-00390001-2815532NaN32ELECTRONIC
20001-48420001-4842498PRINT13PRINT
31520-48980001-4842498NaN34ELECTRONIC
40001-49660001-4966789PRINT15PRINT
........................
17552470-00452470-0045533OTHER31756PRINT
17562470-00532470-0045533NaN31757ELECTRONIC
17572475-99532475-9953608ELECTRONIC21758ELECTRONIC
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -issns['format'].value_counts() -``` - - - - - PRINT 816 - ELECTRONIC 90 - OTHER 2 - Name: format, dtype: int64 - - - - -```python -issns['type'].value_counts() -``` - - - - - PRINT 750 - ELECTRONIC 575 - Name: type, dtype: int64 - - - - -```python -# tester les lignes sans type -issns.loc[issns['format'].isnull()].loc[issns['type'].isnull()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
51520-85240001-4966789NaN36NaN
61520-90240001-4966789NaN37NaN
171943-29840002-78638NaN318NaN
231555-71620002-9343985NaN324NaN
272163-57730002-9513787NaN328NaN
........................
17222160-90472160-9020467NaN31723NaN
17292340-115X2174-84544NaN31730NaN
17322211-32822211-2855990NaN31733NaN
17392297-70072297-6981618NaN31740NaN
17592504-44352504-4427994NaN31760NaN
-

326 rows × 7 columns

-
- - - - -```python -# tester les lignes avec type égal -issns.loc[issns['format'] == issns['type']] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11PRINT
20001-48420001-4842498PRINT13PRINT
40001-49660001-4966789PRINT15PRINT
70001-62680001-6268166PRINT18PRINT
90001-63220001-6322807PRINT110PRINT
........................
17482380-81952380-8195947ELECTRONIC21749ELECTRONIC
17492469-990X2469-990X684ELECTRONIC21750ELECTRONIC
17512469-99502469-995041PRINT11752PRINT
17532470-00102470-001080PRINT11754PRINT
17572475-99532475-9953608ELECTRONIC21758ELECTRONIC
-

774 rows × 7 columns

-
- - - - -```python -# tester les lignes avec type diff -issns.loc[issns['format'] != issns['type']] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
11399-00390001-2815532NaN32ELECTRONIC
31520-48980001-4842498NaN34ELECTRONIC
51520-85240001-4966789NaN36NaN
61520-90240001-4966789NaN37NaN
80942-09400001-6268166NaN39ELECTRONIC
........................
17542470-00292470-001080NaN31755ELECTRONIC
17552470-00452470-0045533OTHER31756PRINT
17562470-00532470-0045533NaN31757ELECTRONIC
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

986 rows × 7 columns

-
- - - - -```python -# attribution de l'id du type avec préference par ISSN.org puis Sherpa -# PRINT = 1 -# ELECTRONIC = 2 -# OTHER = 3 -issns['issn_type'] = issns['format'] -issns.loc[issns['format'].isna(), 'issn_type'] = issns['type'] -issns['issn_type'] = issns['issn_type'].str.replace('PRINT', '1') -issns['issn_type'] = issns['issn_type'].str.replace('ELECTRONIC', '2') -issns['issn_type'] = issns['issn_type'].str.replace('OTHER', '3') -issns['issn_type'] = issns['issn_type'].fillna(3) -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11PRINT
11399-00390001-2815532NaN22ELECTRONIC
20001-48420001-4842498PRINT13PRINT
31520-48980001-4842498NaN24ELECTRONIC
40001-49660001-4966789PRINT15PRINT
........................
17552470-00452470-0045533OTHER31756PRINT
17562470-00532470-0045533NaN21757ELECTRONIC
17572475-99532475-9953608ELECTRONIC21758ELECTRONIC
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -# test de diffs -issns.loc[issns['format'] == 'PRINT'].loc[issns['type'] == 'ELECTRONIC'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
11230959-81380959-8138383PRINT11124ELECTRONIC
11911025-496X1025-496X779PRINT11192ELECTRONIC
14511465-69061465-6906773PRINT11452ELECTRONIC
-
- - - - -```python -# test de diffs -issns.loc[issns['format'] == 'ELECTRONIC'].loc[issns['type'] == 'PRINT'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
1210009-73300009-7330948ELECTRONIC2122PRINT
3600024-37950024-3795968ELECTRONIC2361PRINT
5950163-38640163-3864701ELECTRONIC2596PRINT
6530194-911X0194-911X871ELECTRONIC2654PRINT
6650197-93370197-9337672ELECTRONIC2666PRINT
7110270-64740270-647473ELECTRONIC2712PRINT
7340278-23910278-2391521ELECTRONIC2735PRINT
9280743-74630743-7463114ELECTRONIC2929PRINT
12051040-46511040-4651886ELECTRONIC21206PRINT
12431059-77941059-7794440ELECTRONIC21244PRINT
12871079-56421079-5642468ELECTRONIC21288PRINT
15031528-35421528-3542547ELECTRONIC21504PRINT
15131530-69841530-698436ELECTRONIC21514PRINT
15151534-43201534-4320735ELECTRONIC21516PRINT
15381549-96181549-9618158ELECTRONIC21539PRINT
15461553-734X1553-734X240ELECTRONIC21547PRINT
16611876-61021876-6102249ELECTRONIC21662PRINT
16621877-05681877-0568675ELECTRONIC21663PRINT
16631877-70581877-7058632ELECTRONIC21664PRINT
17302211-12472211-1247113ELECTRONIC21731PRINT
-
- - - - -```python -# test de diffs -issns.loc[issns['format'].isna()].loc[issns['type'] == 'PRINT'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
310003-26700003-2670415NaN132PRINT
1270010-36160010-3616417NaN1128PRINT
1510012-94020012-9402237NaN1152PRINT
2160018-93750018-9375361NaN1217PRINT
3760026-45980026-4598496NaN1377PRINT
6430178-80510178-8051999NaN1644PRINT
8381388-61500368-4466499NaN1839PRINT
11921560-79171025-496X779NaN11193PRINT
12011126-67081029-84797NaN11202PRINT
12491063-651X1063-651X588NaN11250PRINT
15311538-79331538-7836148NaN11532PRINT
15601569-92931569-9285822NaN11561PRINT
15971662-45481662-453X421NaN11598PRINT
16588756-32821873-2763488NaN11659PRINT
-
- - - - -```python -# convertir journal en int -issns['journal'] = issns['journal'].astype(int) -``` - - -```python -# convertir l'index en id -issns = issns.reset_index() -issns['id'] = issns['index'] + 1 -del issns['index'] -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnljournalformatissn_typeidtype
00001-28150001-2815532PRINT11PRINT
11399-00390001-2815532NaN22ELECTRONIC
20001-48420001-4842498PRINT13PRINT
31520-48980001-4842498NaN24ELECTRONIC
40001-49660001-4966789PRINT15PRINT
........................
17552470-00452470-0045533OTHER31756PRINT
17562470-00532470-0045533NaN21757ELECTRONIC
17572475-99532475-9953608ELECTRONIC21758ELECTRONIC
17582504-44272504-4427994PRINT11759NaN
17592504-44352504-4427994NaN31760NaN
-

1760 rows × 7 columns

-
- - - - -```python -issns['issn_type'] = issns['issn_type'].astype(int) -``` - - -```python -issns_export = issns[['id', 'issn', 'journal', 'issn_type']] -issns_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnjournalissn_type
010001-28155321
121399-00395322
230001-48424981
341520-48984982
450001-49667891
...............
175517562470-00455333
175617572470-00535332
175717582475-99536082
175817592504-44279941
175917602504-44359943
-

1760 rows × 4 columns

-
- - - - -```python -# supprimer les doublons par ISSN -issns_export = issns_export.drop_duplicates(subset='issn') -issns_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnjournalissn_type
010001-28155321
121399-00395322
230001-48424981
341520-48984982
450001-49667891
...............
175517562470-00455333
175617572470-00535332
175717582475-99536082
175817592504-44279941
175917602504-44359943
-

1760 rows × 4 columns

-
- - - - -```python -# esport JSON -result = issns_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/issn.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -issns_export.to_csv('sample/issn.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -issns_export.to_excel('sample/issn.xlsx', index=False) -``` diff --git a/import_scripts/08_oacct_sherpa_issns.py b/import_scripts/08_oacct_sherpa_issns.py deleted file mode 100644 index b48bac00..00000000 --- a/import_scripts/08_oacct_sherpa_issns.py +++ /dev/null @@ -1,185 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'extraire les données choisis parmis les sources obtenues par API et les traiter pour les rendre exploitables dans l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 16.07.2021 - -# ## Table ISSNs - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np - - -# In[2]: - - -issns = pd.read_csv('sample/issn_brut.tsv', encoding='utf-8', sep='\t') -issns - - -# ## Ajout du format à partir de Sherpa - -# In[3]: - - -# ajout du format par sherpa -issn_sherpa = pd.read_csv('sample/issn_sherpa.tsv', encoding='utf-8', sep='\t') -issn_sherpa - - -# In[4]: - - -issn_sherpa['type'] = issn_sherpa['type'].str.upper() -issn_sherpa - - -# In[5]: - - -issns = pd.merge(issns, issn_sherpa[['issn', 'type']], on='issn', how='outer') -issns - - -# In[6]: - - -issns['format'].value_counts() - - -# In[7]: - - -issns['type'].value_counts() - - -# In[8]: - - -# tester les lignes sans type -issns.loc[issns['format'].isnull()].loc[issns['type'].isnull()] - - -# In[9]: - - -# tester les lignes avec type égal -issns.loc[issns['format'] == issns['type']] - - -# In[10]: - - -# tester les lignes avec type diff -issns.loc[issns['format'] != issns['type']] - - -# In[11]: - - -# attribution de l'id du type avec préference par ISSN.org puis Sherpa -# PRINT = 1 -# ELECTRONIC = 2 -# OTHER = 3 -issns['issn_type'] = issns['format'] -issns.loc[issns['format'].isna(), 'issn_type'] = issns['type'] -issns['issn_type'] = issns['issn_type'].str.replace('PRINT', '1') -issns['issn_type'] = issns['issn_type'].str.replace('ELECTRONIC', '2') -issns['issn_type'] = issns['issn_type'].str.replace('OTHER', '3') -issns['issn_type'] = issns['issn_type'].fillna(3) -issns - - -# In[12]: - - -# test de diffs -issns.loc[issns['format'] == 'PRINT'].loc[issns['type'] == 'ELECTRONIC'] - - -# In[13]: - - -# test de diffs -issns.loc[issns['format'] == 'ELECTRONIC'].loc[issns['type'] == 'PRINT'] - - -# In[14]: - - -# test de diffs -issns.loc[issns['format'].isna()].loc[issns['type'] == 'PRINT'] - - -# In[15]: - - -# convertir journal en int -issns['journal'] = issns['journal'].astype(int) - - -# In[16]: - - -# convertir l'index en id -issns = issns.reset_index() -issns['id'] = issns['index'] + 1 -del issns['index'] -issns - - -# In[17]: - - -issns['issn_type'] = issns['issn_type'].astype(int) - - -# In[18]: - - -issns_export = issns[['id', 'issn', 'journal', 'issn_type']] -issns_export - - -# In[19]: - - -# supprimer les doublons par ISSN -issns_export = issns_export.drop_duplicates(subset='issn') -issns_export - - -# In[20]: - - -# esport JSON -result = issns_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/issn.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[21]: - - -# export csv -issns_export.to_csv('sample/issn.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[22]: - - -# export excel -issns_export.to_excel('sample/issn.xlsx', index=False) - diff --git a/import_scripts/09_oacct_read_and_publish.md b/import_scripts/09_oacct_read_and_publish.md deleted file mode 100644 index df115477..00000000 --- a/import_scripts/09_oacct_read_and_publish.md +++ /dev/null @@ -1,9540 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet de modifier les données extraites des differentes sources et les exporter dans les tables de l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 08.09.2021 - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) -# definir le debut des ids -id_start = 1 -``` - -## Ajout des rabais pour les revues des licences Read & Publish - -Journals list by publisher : - * https://consortium.ch/elsevier_titlelist_publication - * https://consortium.ch/springer_titlelist_publication - * https://consortium.ch/wiley_titlelist_publish - * https://consortium.ch/tandf_titlelist_publish - * https://consortium.ch/sage_titlelist_publish - * https://consortium.ch/cup_titlelist_publish - -Licence term : - * Elsevier : 2020-2023 - * Springer Nature : 2020-2022 - * Wiley : 2021-2024 - * Taylor & Francis : 2021-2023 - * Cambridge University Press (CUP) : 2021-2023 - -CC licences : - * Elsevier : CC-BY, CC-BY-NC-ND - * Springer Nature : CC-BY, CC-BY-NC - * Wiley : CC-BY, CC-BY-NC, CC-BY-NC-ND - * Taylor & Francis : CC-BY - * Cambridge University Press (CUP) : CC-BY, CC-BY-NC, CC-BY-NC-ND, CC-BY-NC-SA - -Special conditions : - * Cambridge University Press (CUP) : Only the following article types are covered: Research Articles, Review Articles, Rapid Communication, Brief Reports and Case Reports - - - -## Import du fichier des issns - - -```python -issn = pd.read_csv('sample/issn.tsv', encoding='utf-8', header=0, sep='\t') -issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnjournalissn_type
010001-28155321
121399-00395322
230001-48424981
341520-48984982
450001-49667891
...............
175517562470-00455333
175617572470-00535332
175717582475-99536082
175817592504-44279941
175917602504-44359943
-

1760 rows × 4 columns

-
- - - - -```python -# open publishers -publisher = pd.read_csv('sample/publisher.tsv', encoding='utf-8', header=0, sep='\t') -publisher -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
0Revue Médicale Suisse999999NaNNaN0NaNNaN1
1American Physical Society236NaNNaN0http://www.aps.org/NaN2
2Public Library of Science236NaNNaN0http://www.plos.org/NaN3
3The Global Studies Institute de l’Université d...999999NaNNaN0NaNNaN4
4Universitat de València, Departamento de Teorí...999999NaNNaN0NaNNaN5
...........................
191[American Medical Association]999999NaNNaN0http://archneur.jamanetwork.com/issues.aspxNaN192
192Société botanique de Genève999999NaNNaN0NaNNaN193
193Red.: Prof. Dr. F. Cavalli, Istituto oncologic...999999NaNNaN0NaNNaN194
194Generative Grammar Group of the Department of ...999999NaNNaN0NaNNaN195
195UNKNOWN999999NaNNaN0NaNNaN196
-

196 rows × 8 columns

-
- - - - -```python -publisher.loc[publisher['name'] == 'Elsevier'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
10Elsevier236NaNNaN0http://www.elsevier.com/NaN11
-
- - - - -```python -publisher.loc[(publisher['name'] == 'Springer Verlag') | (publisher['name'] == 'Nature Research')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
8Nature Research234NaNNaN0http://www.nature.com/NaN9
28Springer Verlag83NaNNaN0http://www.springerlink.com/?MUD=MPNaN29
-
- - - - -```python -publisher.loc[publisher['name'] == 'Wiley'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
11Wiley236NaNNaN0https://www.wiley.com/en-gbNaN12
-
- - - - -```python -publisher.loc[publisher['name'] == 'Taylor and Francis'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
23Taylor and Francis234NaNNaN0http://www.tandf.co.uk/journals/default.aspNaN24
-
- - - - -```python -publisher.loc[publisher['name'] == 'Cambridge University Press'] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namecountrycitystatestarting_yearwebsiteoa_policiesid
60Cambridge University Press234NaNNaN0http://www.cambridge.org/uk/NaN61
-
- - - - -```python -# ouvrir la liste d'organisations -participants = pd.read_csv('agreements/consortium_institutions_participation_read_and_publish.csv', encoding='utf-8', header=0, sep='\t') -participants -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
InstitutionElsevierSpringer NatureWileyROR
0Agroscopexxxhttps://ror.org/04d8ztx87
1Berner Fachhochschule BFHxxxhttps://ror.org/02bnkt322
2CERNNaNxxhttps://ror.org/01ggx4157
3Eidgenössisches Hochschulinstitut für Berufsbi...xxxhttps://ror.org/00zg4za48
4EPF Lausannexxxhttps://ror.org/02s376052
5ETH Zürichxxxhttps://ror.org/05a28rw58
6Fachhochschule Graubünden FHGRxxxhttps://ror.org/032ymzc07
7Fachhochschule Nordwestschweiz FHNWxxxhttps://ror.org/04mq2g308
8Forschungsinstitut für biologischen Landbau FibLxxxhttps://ror.org/0210tb741
9Graduate Institute (IHEID) – since 2021xxxhttps://ror.org/007ygn379
10Haute école spécialisée de Suisse occidentale ...xxxhttps://ror.org/01xkakk17
11HEP Berne, Jura, Neuchâtel (HEP-BEJUNE)xxxhttps://ror.org/015pmkr43
12HEP Fribourg (PHFR)xxxhttps://ror.org/048gre751
13HEP Vaudxxxhttps://ror.org/01bvm0h13
14Hochschule für Wirtschaft Zürich HWZxxxhttps://ror.org/02ejkey04
15Hochschule Luzern HSLUxxxhttps://ror.org/04nd0xd48
16Interkantonale Hochschule für Heilpädagogik (HfH)xxxhttps://ror.org/00w9q2c06
17Kalaidosxxxhttps://ror.org/049c2kr37
18Lib4RIxxxhttps://ror.org/021f7p178
19MediNaNxNaNNaN
20MMV - Medicine for Malaria Venturesxxxhttps://ror.org/00p9jf779
21Ostschweizer Fachhochschulen OSTxxxhttps://ror.org/038mj2660
22Pädagogische Hochschule Zürich PHZHxxxhttps://ror.org/01awgk221
23PH Bernxxxhttps://ror.org/05jf1ma54
24PH Graubünden (PHGR)xxxhttps://ror.org/02fjgft97
25PH Luzernxxxhttps://ror.org/0235ynq74
26PH Schaffhausen (PHSH)xxxhttps://ror.org/03fs41j10
27PH Schwyzxxxhttps://ror.org/00rqdn375
28PH St. Gallen (PHSG)xxxhttps://ror.org/05m37v666
29PH Thurgau (PHTG)xxxhttps://ror.org/04bf6dq94
30PH Wallis / HEP Valaisxxxhttps://ror.org/040gs8e06
31PH Zugxxxhttps://ror.org/05ghhx264
32Schweizerische Vogelwartexxxhttps://ror.org/03mcsbr76
33Scuola universitaria professionale della Svizz...xxxhttps://ror.org/05ep8g269
34Università della Svizzera italiana USIxxxhttps://ror.org/03c4atk17
35Universität Baselxxxhttps://ror.org/02s6k3f65
36Universität Bernxxxhttps://ror.org/02k7v4d05
37Universität Liechtensteinxxxhttps://ror.org/01qjrx392
38Universität Luzernxxxhttps://ror.org/00kgrkn83
39Universität St. Gallenxxxhttps://ror.org/0561a3s31
40Universität Zürichxxxhttps://ror.org/02crff812
41Université de Fribourgxxxhttps://ror.org/022fs9h90
42Université de Genèvexxxhttps://ror.org/01swzsf04
43Université de Lausannexxxhttps://ror.org/019whta54
44Université de Neuchâtelxxxhttps://ror.org/00vasag41
45Zürcher Hochschule der Künste ZHdKxxxhttps://ror.org/05r0ap620
46Zürcher Hochschule für Angewandte Wissenschaft...xxxhttps://ror.org/05pmsvm27
-
- - - - -```python -# suppression de Lib4RI qui est une bibliothèque -participants = participants.loc[participants['Institution'] != 'Lib4RI'] -participants -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
InstitutionElsevierSpringer NatureWileyROR
0Agroscopexxxhttps://ror.org/04d8ztx87
1Berner Fachhochschule BFHxxxhttps://ror.org/02bnkt322
2CERNNaNxxhttps://ror.org/01ggx4157
3Eidgenössisches Hochschulinstitut für Berufsbi...xxxhttps://ror.org/00zg4za48
4EPF Lausannexxxhttps://ror.org/02s376052
5ETH Zürichxxxhttps://ror.org/05a28rw58
6Fachhochschule Graubünden FHGRxxxhttps://ror.org/032ymzc07
7Fachhochschule Nordwestschweiz FHNWxxxhttps://ror.org/04mq2g308
8Forschungsinstitut für biologischen Landbau FibLxxxhttps://ror.org/0210tb741
9Graduate Institute (IHEID) – since 2021xxxhttps://ror.org/007ygn379
10Haute école spécialisée de Suisse occidentale ...xxxhttps://ror.org/01xkakk17
11HEP Berne, Jura, Neuchâtel (HEP-BEJUNE)xxxhttps://ror.org/015pmkr43
12HEP Fribourg (PHFR)xxxhttps://ror.org/048gre751
13HEP Vaudxxxhttps://ror.org/01bvm0h13
14Hochschule für Wirtschaft Zürich HWZxxxhttps://ror.org/02ejkey04
15Hochschule Luzern HSLUxxxhttps://ror.org/04nd0xd48
16Interkantonale Hochschule für Heilpädagogik (HfH)xxxhttps://ror.org/00w9q2c06
17Kalaidosxxxhttps://ror.org/049c2kr37
19MediNaNxNaNNaN
20MMV - Medicine for Malaria Venturesxxxhttps://ror.org/00p9jf779
21Ostschweizer Fachhochschulen OSTxxxhttps://ror.org/038mj2660
22Pädagogische Hochschule Zürich PHZHxxxhttps://ror.org/01awgk221
23PH Bernxxxhttps://ror.org/05jf1ma54
24PH Graubünden (PHGR)xxxhttps://ror.org/02fjgft97
25PH Luzernxxxhttps://ror.org/0235ynq74
26PH Schaffhausen (PHSH)xxxhttps://ror.org/03fs41j10
27PH Schwyzxxxhttps://ror.org/00rqdn375
28PH St. Gallen (PHSG)xxxhttps://ror.org/05m37v666
29PH Thurgau (PHTG)xxxhttps://ror.org/04bf6dq94
30PH Wallis / HEP Valaisxxxhttps://ror.org/040gs8e06
31PH Zugxxxhttps://ror.org/05ghhx264
32Schweizerische Vogelwartexxxhttps://ror.org/03mcsbr76
33Scuola universitaria professionale della Svizz...xxxhttps://ror.org/05ep8g269
34Università della Svizzera italiana USIxxxhttps://ror.org/03c4atk17
35Universität Baselxxxhttps://ror.org/02s6k3f65
36Universität Bernxxxhttps://ror.org/02k7v4d05
37Universität Liechtensteinxxxhttps://ror.org/01qjrx392
38Universität Luzernxxxhttps://ror.org/00kgrkn83
39Universität St. Gallenxxxhttps://ror.org/0561a3s31
40Universität Zürichxxxhttps://ror.org/02crff812
41Université de Fribourgxxxhttps://ror.org/022fs9h90
42Université de Genèvexxxhttps://ror.org/01swzsf04
43Université de Lausannexxxhttps://ror.org/019whta54
44Université de Neuchâtelxxxhttps://ror.org/00vasag41
45Zürcher Hochschule der Künste ZHdKxxxhttps://ror.org/05r0ap620
46Zürcher Hochschule für Angewandte Wissenschaft...xxxhttps://ror.org/05pmsvm27
-
- - - - -```python -# ajout de TF et CUP pour tous (TODO : obtenir la liste des bibliothèques pour ces deux licences) -participants['TF'] = 'x' -participants['CUP'] = 'x' -participants -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - This is separate from the ipykernel package so we can avoid doing imports until - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
InstitutionElsevierSpringer NatureWileyRORTFCUP
0Agroscopexxxhttps://ror.org/04d8ztx87xx
1Berner Fachhochschule BFHxxxhttps://ror.org/02bnkt322xx
2CERNNaNxxhttps://ror.org/01ggx4157xx
3Eidgenössisches Hochschulinstitut für Berufsbi...xxxhttps://ror.org/00zg4za48xx
4EPF Lausannexxxhttps://ror.org/02s376052xx
5ETH Zürichxxxhttps://ror.org/05a28rw58xx
6Fachhochschule Graubünden FHGRxxxhttps://ror.org/032ymzc07xx
7Fachhochschule Nordwestschweiz FHNWxxxhttps://ror.org/04mq2g308xx
8Forschungsinstitut für biologischen Landbau FibLxxxhttps://ror.org/0210tb741xx
9Graduate Institute (IHEID) – since 2021xxxhttps://ror.org/007ygn379xx
10Haute école spécialisée de Suisse occidentale ...xxxhttps://ror.org/01xkakk17xx
11HEP Berne, Jura, Neuchâtel (HEP-BEJUNE)xxxhttps://ror.org/015pmkr43xx
12HEP Fribourg (PHFR)xxxhttps://ror.org/048gre751xx
13HEP Vaudxxxhttps://ror.org/01bvm0h13xx
14Hochschule für Wirtschaft Zürich HWZxxxhttps://ror.org/02ejkey04xx
15Hochschule Luzern HSLUxxxhttps://ror.org/04nd0xd48xx
16Interkantonale Hochschule für Heilpädagogik (HfH)xxxhttps://ror.org/00w9q2c06xx
17Kalaidosxxxhttps://ror.org/049c2kr37xx
19MediNaNxNaNNaNxx
20MMV - Medicine for Malaria Venturesxxxhttps://ror.org/00p9jf779xx
21Ostschweizer Fachhochschulen OSTxxxhttps://ror.org/038mj2660xx
22Pädagogische Hochschule Zürich PHZHxxxhttps://ror.org/01awgk221xx
23PH Bernxxxhttps://ror.org/05jf1ma54xx
24PH Graubünden (PHGR)xxxhttps://ror.org/02fjgft97xx
25PH Luzernxxxhttps://ror.org/0235ynq74xx
26PH Schaffhausen (PHSH)xxxhttps://ror.org/03fs41j10xx
27PH Schwyzxxxhttps://ror.org/00rqdn375xx
28PH St. Gallen (PHSG)xxxhttps://ror.org/05m37v666xx
29PH Thurgau (PHTG)xxxhttps://ror.org/04bf6dq94xx
30PH Wallis / HEP Valaisxxxhttps://ror.org/040gs8e06xx
31PH Zugxxxhttps://ror.org/05ghhx264xx
32Schweizerische Vogelwartexxxhttps://ror.org/03mcsbr76xx
33Scuola universitaria professionale della Svizz...xxxhttps://ror.org/05ep8g269xx
34Università della Svizzera italiana USIxxxhttps://ror.org/03c4atk17xx
35Universität Baselxxxhttps://ror.org/02s6k3f65xx
36Universität Bernxxxhttps://ror.org/02k7v4d05xx
37Universität Liechtensteinxxxhttps://ror.org/01qjrx392xx
38Universität Luzernxxxhttps://ror.org/00kgrkn83xx
39Universität St. Gallenxxxhttps://ror.org/0561a3s31xx
40Universität Zürichxxxhttps://ror.org/02crff812xx
41Université de Fribourgxxxhttps://ror.org/022fs9h90xx
42Université de Genèvexxxhttps://ror.org/01swzsf04xx
43Université de Lausannexxxhttps://ror.org/019whta54xx
44Université de Neuchâtelxxxhttps://ror.org/00vasag41xx
45Zürcher Hochschule der Künste ZHdKxxxhttps://ror.org/05r0ap620xx
46Zürcher Hochschule für Angewandte Wissenschaft...xxxhttps://ror.org/05pmsvm27xx
-
- - - - -```python -# ouvrir la liste des journaux Elsevier -elsevier = pd.read_excel('agreements/Elsevier_titlelist_publication.xlsx', skiprows=7) -elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSN
0Academic Pediatrics1876-2859
1Accident Analysis and Prevention0001-4575
2Accounting, Organizations and Society0361-3682
3Acta Astronautica0094-5765
4Acta Biomaterialia1742-7061
.........
2240Wound Medicine2213-9095
2241Zeitschrift fuer Evidenz, Fortbildung und Qual...1865-9217
2242Zeitschrift fuer Medizinische Physik0939-3889
2243Zoologischer Anzeiger0044-5231
2244Zoology0944-2006
-

2245 rows × 2 columns

-
- - - - -```python -# ajout du champ version -elsevier['article_version'] = 'published' -elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNarticle_version
0Academic Pediatrics1876-2859published
1Accident Analysis and Prevention0001-4575published
2Accounting, Organizations and Society0361-3682published
3Acta Astronautica0094-5765published
4Acta Biomaterialia1742-7061published
............
2240Wound Medicine2213-9095published
2241Zeitschrift fuer Evidenz, Fortbildung und Qual...1865-9217published
2242Zeitschrift fuer Medizinische Physik0939-3889published
2243Zoologischer Anzeiger0044-5231published
2244Zoology0944-2006published
-

2245 rows × 3 columns

-
- - - - -```python -# ajout des dates -elsevier['valid_from'] = '2020-01-01' -elsevier['valid_until'] = '2023-12-31' -elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNarticle_versionvalid_fromvalid_until
0Academic Pediatrics1876-2859published2020-01-012023-12-31
1Accident Analysis and Prevention0001-4575published2020-01-012023-12-31
2Accounting, Organizations and Society0361-3682published2020-01-012023-12-31
3Acta Astronautica0094-5765published2020-01-012023-12-31
4Acta Biomaterialia1742-7061published2020-01-012023-12-31
..................
2240Wound Medicine2213-9095published2020-01-012023-12-31
2241Zeitschrift fuer Evidenz, Fortbildung und Qual...1865-9217published2020-01-012023-12-31
2242Zeitschrift fuer Medizinische Physik0939-3889published2020-01-012023-12-31
2243Zoologischer Anzeiger0044-5231published2020-01-012023-12-31
2244Zoology0944-2006published2020-01-012023-12-31
-

2245 rows × 5 columns

-
- - - - -```python -# ajout du embargo et archiving -elsevier['embargo_months'] = 0 -elsevier['archiving'] = True -elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNarticle_versionvalid_fromvalid_untilembargo_monthsarchiving
0Academic Pediatrics1876-2859published2020-01-012023-12-310True
1Accident Analysis and Prevention0001-4575published2020-01-012023-12-310True
2Accounting, Organizations and Society0361-3682published2020-01-012023-12-310True
3Acta Astronautica0094-5765published2020-01-012023-12-310True
4Acta Biomaterialia1742-7061published2020-01-012023-12-310True
........................
2240Wound Medicine2213-9095published2020-01-012023-12-310True
2241Zeitschrift fuer Evidenz, Fortbildung und Qual...1865-9217published2020-01-012023-12-310True
2242Zeitschrift fuer Medizinische Physik0939-3889published2020-01-012023-12-310True
2243Zoologischer Anzeiger0044-5231published2020-01-012023-12-310True
2244Zoology0944-2006published2020-01-012023-12-310True
-

2245 rows × 7 columns

-
- - - - -```python -elsevier.iloc[elsevier.shape[0]-1] -``` - - - - - Title Zoology - ISSN 0944-2006 - article_version published - valid_from 2020-01-01 - valid_until 2023-12-31 - embargo_months 0 - archiving True - Name: 2244, dtype: object - - - - -```python -# ajout du champ license -# cc_by, cc_by_nc_nd -rp = pd.DataFrame() -elsevier['article_version'] = 'published' -elsevier['license'] = 'cc_by' -elsevier['Elsevier'] = 'x' -rp = rp.append(elsevier, ignore_index=True) -elsevier['license'] = 'cc_by_nc_nd' -rp = rp.append(elsevier, ignore_index=True) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNarticle_versionvalid_fromvalid_untilembargo_monthsarchivinglicenseElsevier
0Academic Pediatrics1876-2859published2020-01-012023-12-310Truecc_byx
1Accident Analysis and Prevention0001-4575published2020-01-012023-12-310Truecc_byx
2Accounting, Organizations and Society0361-3682published2020-01-012023-12-310Truecc_byx
3Acta Astronautica0094-5765published2020-01-012023-12-310Truecc_byx
4Acta Biomaterialia1742-7061published2020-01-012023-12-310Truecc_byx
..............................
4485Wound Medicine2213-9095published2020-01-012023-12-310Truecc_by_nc_ndx
4486Zeitschrift fuer Evidenz, Fortbildung und Qual...1865-9217published2020-01-012023-12-310Truecc_by_nc_ndx
4487Zeitschrift fuer Medizinische Physik0939-3889published2020-01-012023-12-310Truecc_by_nc_ndx
4488Zoologischer Anzeiger0044-5231published2020-01-012023-12-310Truecc_by_nc_ndx
4489Zoology0944-2006published2020-01-012023-12-310Truecc_by_nc_ndx
-

4490 rows × 9 columns

-
- - - - -```python -# ouvrir la liste des journaux Springer Nature -springer = pd.read_excel('agreements/Springer_titlelist_publication.xlsx', skiprows=7) -springer -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNURL
03 Biotech2190-5738https://www.springer.com/journal/13205
14OR1614-2411https://www.springer.com/journal/10288
2AAPS PharmSciTech1530-9932https://www.springer.com/journal/12249
3Abdominal Radiology2366-0058https://www.springer.com/journal/261
4Abhandlungen aus dem Mathematischen Seminar de...1865-8784https://www.springer.com/journal/12188
............
2035Zeitschrift für Religion, Gesellschaft und Pol...2510-1226https://www.springer.com/journal/41682
2036Zeitschrift für Rheumatologie1435-1250https://www.springer.com/journal/393
2037Zeitschrift für Vergleichende Politikwissenschaft1865-2654https://www.springer.com/journal/12286
2038Zentralblatt für Arbeitsmedizin, Arbeitsschutz...2198-0713https://www.springer.com/journal/40664
2039Zoomorphology1432-234Xhttps://www.springer.com/journal/435
-

2040 rows × 3 columns

-
- - - - -```python -# ajout du champ license -# cc_by, cc_by_nc -springer['article_version'] = 'published' -springer['license'] = 'cc_by' -springer['Springer Nature'] = 'x' -# ajout des dates -springer['valid_from'] = '2020-01-01' -springer['valid_until'] = '2022-12-31' -# ajout du embargo et archiving -springer['embargo_months'] = 0 -springer['archiving'] = True -``` - - -```python -# append -rp = rp.append(springer, ignore_index=True) -springer['license'] = 'cc_by_nc' -rp = rp.append(springer, ignore_index=True) -rp -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py:7123: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version - of pandas will change to not sort by default. - - To accept the future behavior, pass 'sort=False'. - - To retain the current behavior and silence the warning, pass 'sort=True'. - - sort=sort, - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElsevierISSNSpringer NatureTitleURLarchivingarticle_versionembargo_monthslicensevalid_fromvalid_until
0x1876-2859NaNAcademic PediatricsNaNTruepublished0cc_by2020-01-012023-12-31
1x0001-4575NaNAccident Analysis and PreventionNaNTruepublished0cc_by2020-01-012023-12-31
2x0361-3682NaNAccounting, Organizations and SocietyNaNTruepublished0cc_by2020-01-012023-12-31
3x0094-5765NaNActa AstronauticaNaNTruepublished0cc_by2020-01-012023-12-31
4x1742-7061NaNActa BiomaterialiaNaNTruepublished0cc_by2020-01-012023-12-31
....................................
8565NaN2510-1226xZeitschrift für Religion, Gesellschaft und Pol...https://www.springer.com/journal/41682Truepublished0cc_by_nc2020-01-012022-12-31
8566NaN1435-1250xZeitschrift für Rheumatologiehttps://www.springer.com/journal/393Truepublished0cc_by_nc2020-01-012022-12-31
8567NaN1865-2654xZeitschrift für Vergleichende Politikwissenschafthttps://www.springer.com/journal/12286Truepublished0cc_by_nc2020-01-012022-12-31
8568NaN2198-0713xZentralblatt für Arbeitsmedizin, Arbeitsschutz...https://www.springer.com/journal/40664Truepublished0cc_by_nc2020-01-012022-12-31
8569NaN1432-234XxZoomorphologyhttps://www.springer.com/journal/435Truepublished0cc_by_nc2020-01-012022-12-31
-

8570 rows × 11 columns

-
- - - - -```python -# ouvrir la liste des journaux Wiley -wiley = pd.read_excel('agreements/Wiley_titlelist_publish.xlsx', skiprows=7) -wiley -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNURL
0ABACUS1467-6281https://onlinelibrary.wiley.com/journal/14676281
1ACADEMIC EMERGENCY MEDICINE1553-2712https://onlinelibrary.wiley.com/journal/15532712
2ACCOUNTING & FINANCE1467-629Xhttps://onlinelibrary.wiley.com/journal/1467629X
3ACCOUNTING PERSPECTIVES1911-3838https://onlinelibrary.wiley.com/journal/19113838
4ACTA ANAESTHESIOLOGICA SCANDINAVICA1399-6576https://onlinelibrary.wiley.com/journal/13996576
............
1391ZEITSCHRIFT FüR ANORGANISCHE UND ALLGEMEINE CH...1521-3749https://onlinelibrary.wiley.com/journal/15213749
1392ZOO BIOLOGY1098-2361https://onlinelibrary.wiley.com/journal/10982361
1393ZOOLOGICA SCRIPTA1463-6409https://onlinelibrary.wiley.com/journal/14636409
1394ZOONOSES AND PUBLIC HEALTH1863-2378https://onlinelibrary.wiley.com/journal/18632378
1395ZYGON® JOURNAL OF RELIGION AND SCIENCE1467-9744https://onlinelibrary.wiley.com/journal/14679744
-

1396 rows × 3 columns

-
- - - - -```python -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd -wiley['article_version'] = 'published' -wiley['license'] = 'cc_by' -wiley['Wiley'] = 'x' -# ajout des dates -wiley['valid_from'] = '2021-01-01' -wiley['valid_until'] = '2024-12-31' -# ajout du embargo et archiving -wiley['embargo_months'] = 0 -wiley['archiving'] = True -rp = rp.append(wiley, ignore_index=True) -# append avec une autre licence -wiley['license'] = 'cc_by_nc' -rp = rp.append(wiley, ignore_index=True) -# append avec une autre licence -wiley['license'] = 'cc_by_nc_nd' -rp = rp.append(wiley, ignore_index=True) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElsevierISSNSpringer NatureTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_until
0x1876-2859NaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-31
1x0001-4575NaNAccident Analysis and PreventionNaNNaNTruepublished0cc_by2020-01-012023-12-31
2x0361-3682NaNAccounting, Organizations and SocietyNaNNaNTruepublished0cc_by2020-01-012023-12-31
3x0094-5765NaNActa AstronauticaNaNNaNTruepublished0cc_by2020-01-012023-12-31
4x1742-7061NaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-31
.......................................
12753NaN1521-3749NaNZEITSCHRIFT FüR ANORGANISCHE UND ALLGEMEINE CH...https://onlinelibrary.wiley.com/journal/15213749xTruepublished0cc_by_nc_nd2021-01-012024-12-31
12754NaN1098-2361NaNZOO BIOLOGYhttps://onlinelibrary.wiley.com/journal/10982361xTruepublished0cc_by_nc_nd2021-01-012024-12-31
12755NaN1463-6409NaNZOOLOGICA SCRIPTAhttps://onlinelibrary.wiley.com/journal/14636409xTruepublished0cc_by_nc_nd2021-01-012024-12-31
12756NaN1863-2378NaNZOONOSES AND PUBLIC HEALTHhttps://onlinelibrary.wiley.com/journal/18632378xTruepublished0cc_by_nc_nd2021-01-012024-12-31
12757NaN1467-9744NaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-31
-

12758 rows × 12 columns

-
- - - - -```python -# ouvrir la liste des journaux TF -tf = pd.read_excel('agreements/TandF_titlelist_publish.xlsx', skiprows=7) -tf -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSN
0a/b: Auto/Biography Studies2151-7290
1Accountability in Research1545-5815
2Accounting and Business Research2159-4260
3Accounting Education1468-4489
4Accounting Forum1467-6303
.........
2401Writing Systems ResearchNaN
2402Xenobiotica1366-5928
2403Yorkshire Archaeological Journal2045-0664
2404Youth Theatre Journal1948-4798
2405Zoology in the Middle East2326-2680
-

2406 rows × 2 columns

-
- - - - -```python -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd -tf['article_version'] = 'published' -tf['license'] = 'cc_by' -tf['TF'] = 'x' -# ajout des dates -tf['valid_from'] = '2021-01-01' -tf['valid_until'] = '2023-12-31' -# ajout du embargo et archiving -tf['embargo_months'] = 0 -tf['archiving'] = True -``` - - -```python -# append -rp = rp.append(tf, ignore_index=True) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElsevierISSNSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_until
0x1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-31
1x0001-4575NaNNaNAccident Analysis and PreventionNaNNaNTruepublished0cc_by2020-01-012023-12-31
2x0361-3682NaNNaNAccounting, Organizations and SocietyNaNNaNTruepublished0cc_by2020-01-012023-12-31
3x0094-5765NaNNaNActa AstronauticaNaNNaNTruepublished0cc_by2020-01-012023-12-31
4x1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-31
..........................................
15159NaNNaNNaNxWriting Systems ResearchNaNNaNTruepublished0cc_by2021-01-012023-12-31
15160NaN1366-5928NaNxXenobioticaNaNNaNTruepublished0cc_by2021-01-012023-12-31
15161NaN2045-0664NaNxYorkshire Archaeological JournalNaNNaNTruepublished0cc_by2021-01-012023-12-31
15162NaN1948-4798NaNxYouth Theatre JournalNaNNaNTruepublished0cc_by2021-01-012023-12-31
15163NaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-31
-

15164 rows × 13 columns

-
- - - - -```python -# ouvrir la liste des journaux CUP -cup = pd.read_excel('agreements/CUP_Journals_titlelist_publish.xlsx', skiprows=7) -cup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Titlee-ISSNURL
0Agricultural and Resource Economics Review2372-2614http://www.cambridge.org/core/product/identifi...
1AJIL Unbound2398-7723http://www.cambridge.org/core/product/identifi...
2Annals of Glaciology1727-5644http://www.cambridge.org/core/product/identifi...
3APSIPA Transactions on Signal and Information ...2048-7703http://www.cambridge.org/core/product/identifi...
4Biological Imaging2633-903Xhttp://www.cambridge.org/core/product/identifi...
............
366Visual Neuroscience1469-8714http://www.cambridge.org/core/product/identifi...
367Weed Science1550-2759http://www.cambridge.org/core/product/identifi...
368Weed Technology1550-2740http://www.cambridge.org/core/product/identifi...
369World Trade Review1475-3138http://www.cambridge.org/core/product/identifi...
370Zygote1469-8730http://www.cambridge.org/core/product/identifi...
-

371 rows × 3 columns

-
- - - - -```python -# renommer l'ISSN -cup = cup.rename(columns = {'e-ISSN' : 'ISSN'}) -cup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
TitleISSNURL
0Agricultural and Resource Economics Review2372-2614http://www.cambridge.org/core/product/identifi...
1AJIL Unbound2398-7723http://www.cambridge.org/core/product/identifi...
2Annals of Glaciology1727-5644http://www.cambridge.org/core/product/identifi...
3APSIPA Transactions on Signal and Information ...2048-7703http://www.cambridge.org/core/product/identifi...
4Biological Imaging2633-903Xhttp://www.cambridge.org/core/product/identifi...
............
366Visual Neuroscience1469-8714http://www.cambridge.org/core/product/identifi...
367Weed Science1550-2759http://www.cambridge.org/core/product/identifi...
368Weed Technology1550-2740http://www.cambridge.org/core/product/identifi...
369World Trade Review1475-3138http://www.cambridge.org/core/product/identifi...
370Zygote1469-8730http://www.cambridge.org/core/product/identifi...
-

371 rows × 3 columns

-
- - - - -```python -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd, cc_by_nc_sa -cup['article_version'] = 'published' -cup['license'] = 'cc_by' -cup['CUP'] = 'x' -# ajout des dates -cup['valid_from'] = '2021-01-01' -cup['valid_until'] = '2023-12-31' -# ajout du embargo et archiving -cup['embargo_months'] = 60 -cup['archiving'] = True -``` - - -```python -# append -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc' -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc_nd' -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc_sa' -rp = rp.append(cup, ignore_index=True) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierISSNSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_until
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-31
1NaNx0001-4575NaNNaNAccident Analysis and PreventionNaNNaNTruepublished0cc_by2020-01-012023-12-31
2NaNx0361-3682NaNNaNAccounting, Organizations and SocietyNaNNaNTruepublished0cc_by2020-01-012023-12-31
3NaNx0094-5765NaNNaNActa AstronauticaNaNNaNTruepublished0cc_by2020-01-012023-12-31
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-31
.............................................
16643xNaN1469-8714NaNNaNVisual Neurosciencehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-31
16644xNaN1550-2759NaNNaNWeed Sciencehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-31
16645xNaN1550-2740NaNNaNWeed Technologyhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-31
16646xNaN1475-3138NaNNaNWorld Trade Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-31
16647xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-31
-

16648 rows × 14 columns

-
- - - - -```python -# test des lignes sans embargo -rp.loc[rp['embargo_months'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierISSNSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_until
-
- - - - -```python -# ajout des ISSN-L -issnl = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issnl -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISSNISSN-L
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -issnl = issnl.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -rp = rp.rename(columns={'ISSN' : 'issn'}) -``` - - -```python -# merge -rp = pd.merge(rp, issnl, on='issn', how='left') -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnl
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859
1NaNx0001-4575NaNNaNAccident Analysis and PreventionNaNNaNTruepublished0cc_by2020-01-012023-12-310001-4575
2NaNx0361-3682NaNNaNAccounting, Organizations and SocietyNaNNaNTruepublished0cc_by2020-01-012023-12-310361-3682
3NaNx0094-5765NaNNaNActa AstronauticaNaNNaNTruepublished0cc_by2020-01-012023-12-310094-5765
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061
................................................
16643xNaN1469-8714NaNNaNVisual Neurosciencehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310952-5238
16644xNaN1550-2759NaNNaNWeed Sciencehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310043-1745
16645xNaN1550-2740NaNNaNWeed Technologyhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310890-037X
16646xNaN1475-3138NaNNaNWorld Trade Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311474-7456
16647xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994
-

16648 rows × 15 columns

-
- - - - -```python -# cummuler les issns pour le merge -# rp_1 = rp.loc[rp['issnl'].notna()][['issnl', 'article_version', 'license', 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP']] -# rp_1 = rp_1.rename(columns = {'issnl' : 'issn'}) -# rp_2 = rp.loc[rp['issn'].notna()][['issn', 'article_version', 'license', 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP']] -# rp_all = rp_1.append(rp_2, ignore_index=True) -rp_all = rp -``` - - -```python -# ajouter les champs manquants -# valeur discount (id 2) à 100% pour les licences read & publish -# elsevier['amount'] = 100 -# elsevier['symbol'] = '%' -# elsevier['cost_factor_type'] = 2 -# elsevier['comment'] = 'Source: swissuniversities' -# elsevier -``` - - -```python -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_elsevier = participants.loc[participants['Elsevier'].notna()][['Elsevier', 'ROR']] -rp_elsevier = rp_all.loc[rp_all['Elsevier'].notna()] -rp_1 = pd.merge(rp_elsevier, participants_elsevier, on='Elsevier', how='outer') -rp_1 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/04d8ztx87
1NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02bnkt322
2NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/00zg4za48
3NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02s376052
4NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/05a28rw58
...................................................
197555NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006https://ror.org/01swzsf04
197556NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006https://ror.org/019whta54
197557NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006https://ror.org/00vasag41
197558NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006https://ror.org/05r0ap620
197559NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006https://ror.org/05pmsvm27
-

197560 rows × 16 columns

-
- - - - -```python -rp_elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnl
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859
1NaNx0001-4575NaNNaNAccident Analysis and PreventionNaNNaNTruepublished0cc_by2020-01-012023-12-310001-4575
2NaNx0361-3682NaNNaNAccounting, Organizations and SocietyNaNNaNTruepublished0cc_by2020-01-012023-12-310361-3682
3NaNx0094-5765NaNNaNActa AstronauticaNaNNaNTruepublished0cc_by2020-01-012023-12-310094-5765
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061
................................................
4485NaNx2213-9095NaNNaNWound MedicineNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-312213-9095
4486NaNx1865-9217NaNNaNZeitschrift fuer Evidenz, Fortbildung und Qual...NaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-311865-9217
4487NaNx0939-3889NaNNaNZeitschrift fuer Medizinische PhysikNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310939-3889
4488NaNx0044-5231NaNNaNZoologischer AnzeigerNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310044-5231
4489NaNx0944-2006NaNNaNZoologyNaNNaNTruepublished0cc_by_nc_nd2020-01-012023-12-310944-2006
-

4490 rows × 15 columns

-
- - - - -```python -participants_elsevier -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ElsevierROR
0xhttps://ror.org/04d8ztx87
1xhttps://ror.org/02bnkt322
3xhttps://ror.org/00zg4za48
4xhttps://ror.org/02s376052
5xhttps://ror.org/05a28rw58
6xhttps://ror.org/032ymzc07
7xhttps://ror.org/04mq2g308
8xhttps://ror.org/0210tb741
9xhttps://ror.org/007ygn379
10xhttps://ror.org/01xkakk17
11xhttps://ror.org/015pmkr43
12xhttps://ror.org/048gre751
13xhttps://ror.org/01bvm0h13
14xhttps://ror.org/02ejkey04
15xhttps://ror.org/04nd0xd48
16xhttps://ror.org/00w9q2c06
17xhttps://ror.org/049c2kr37
20xhttps://ror.org/00p9jf779
21xhttps://ror.org/038mj2660
22xhttps://ror.org/01awgk221
23xhttps://ror.org/05jf1ma54
24xhttps://ror.org/02fjgft97
25xhttps://ror.org/0235ynq74
26xhttps://ror.org/03fs41j10
27xhttps://ror.org/00rqdn375
28xhttps://ror.org/05m37v666
29xhttps://ror.org/04bf6dq94
30xhttps://ror.org/040gs8e06
31xhttps://ror.org/05ghhx264
32xhttps://ror.org/03mcsbr76
33xhttps://ror.org/05ep8g269
34xhttps://ror.org/03c4atk17
35xhttps://ror.org/02s6k3f65
36xhttps://ror.org/02k7v4d05
37xhttps://ror.org/01qjrx392
38xhttps://ror.org/00kgrkn83
39xhttps://ror.org/0561a3s31
40xhttps://ror.org/02crff812
41xhttps://ror.org/022fs9h90
42xhttps://ror.org/01swzsf04
43xhttps://ror.org/019whta54
44xhttps://ror.org/00vasag41
45xhttps://ror.org/05r0ap620
46xhttps://ror.org/05pmsvm27
-
- - - - -```python -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_springer = participants.loc[participants['Springer Nature'].notna()][['Springer Nature', 'ROR']] -rp_springer = rp_all.loc[rp_all['Springer Nature'].notna()] -rp_2 = pd.merge(rp_springer, participants_springer, on='Springer Nature', how='outer') -rp_2 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNNaN2190-5738xNaN3 Biotechhttps://www.springer.com/journal/13205NaNTruepublished0cc_by2020-01-012022-12-312190-5738https://ror.org/04d8ztx87
1NaNNaN2190-5738xNaN3 Biotechhttps://www.springer.com/journal/13205NaNTruepublished0cc_by2020-01-012022-12-312190-5738https://ror.org/02bnkt322
2NaNNaN2190-5738xNaN3 Biotechhttps://www.springer.com/journal/13205NaNTruepublished0cc_by2020-01-012022-12-312190-5738https://ror.org/01ggx4157
3NaNNaN2190-5738xNaN3 Biotechhttps://www.springer.com/journal/13205NaNTruepublished0cc_by2020-01-012022-12-312190-5738https://ror.org/00zg4za48
4NaNNaN2190-5738xNaN3 Biotechhttps://www.springer.com/journal/13205NaNTruepublished0cc_by2020-01-012022-12-312190-5738https://ror.org/02s376052
...................................................
187675NaNNaN1432-234XxNaNZoomorphologyhttps://www.springer.com/journal/435NaNTruepublished0cc_by_nc2020-01-012022-12-310720-213Xhttps://ror.org/01swzsf04
187676NaNNaN1432-234XxNaNZoomorphologyhttps://www.springer.com/journal/435NaNTruepublished0cc_by_nc2020-01-012022-12-310720-213Xhttps://ror.org/019whta54
187677NaNNaN1432-234XxNaNZoomorphologyhttps://www.springer.com/journal/435NaNTruepublished0cc_by_nc2020-01-012022-12-310720-213Xhttps://ror.org/00vasag41
187678NaNNaN1432-234XxNaNZoomorphologyhttps://www.springer.com/journal/435NaNTruepublished0cc_by_nc2020-01-012022-12-310720-213Xhttps://ror.org/05r0ap620
187679NaNNaN1432-234XxNaNZoomorphologyhttps://www.springer.com/journal/435NaNTruepublished0cc_by_nc2020-01-012022-12-310720-213Xhttps://ror.org/05pmsvm27
-

187680 rows × 16 columns

-
- - - - -```python -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_wiley = participants.loc[participants['Wiley'].notna()][['Wiley', 'ROR']] -rp_wiley = rp_all.loc[rp_all['Wiley'].notna()] -rp_3 = pd.merge(rp_wiley, participants_wiley, on='Wiley', how='outer') -rp_3 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072https://ror.org/04d8ztx87
1NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072https://ror.org/02bnkt322
2NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072https://ror.org/01ggx4157
3NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072https://ror.org/00zg4za48
4NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072https://ror.org/02s376052
...................................................
188455NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385https://ror.org/01swzsf04
188456NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385https://ror.org/019whta54
188457NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385https://ror.org/00vasag41
188458NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385https://ror.org/05r0ap620
188459NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385https://ror.org/05pmsvm27
-

188460 rows × 16 columns

-
- - - - -```python -rp_wiley -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnl
8570NaNNaN1467-6281NaNNaNABACUShttps://onlinelibrary.wiley.com/journal/14676281xTruepublished0cc_by2021-01-012024-12-310001-3072
8571NaNNaN1553-2712NaNNaNACADEMIC EMERGENCY MEDICINEhttps://onlinelibrary.wiley.com/journal/15532712xTruepublished0cc_by2021-01-012024-12-311069-6563
8572NaNNaN1467-629XNaNNaNACCOUNTING & FINANCEhttps://onlinelibrary.wiley.com/journal/1467629XxTruepublished0cc_by2021-01-012024-12-310810-5391
8573NaNNaN1911-3838NaNNaNACCOUNTING PERSPECTIVEShttps://onlinelibrary.wiley.com/journal/19113838xTruepublished0cc_by2021-01-012024-12-311911-382X
8574NaNNaN1399-6576NaNNaNACTA ANAESTHESIOLOGICA SCANDINAVICAhttps://onlinelibrary.wiley.com/journal/13996576xTruepublished0cc_by2021-01-012024-12-310001-5172
................................................
12753NaNNaN1521-3749NaNNaNZEITSCHRIFT FüR ANORGANISCHE UND ALLGEMEINE CH...https://onlinelibrary.wiley.com/journal/15213749xTruepublished0cc_by_nc_nd2021-01-012024-12-310044-2313
12754NaNNaN1098-2361NaNNaNZOO BIOLOGYhttps://onlinelibrary.wiley.com/journal/10982361xTruepublished0cc_by_nc_nd2021-01-012024-12-310733-3188
12755NaNNaN1463-6409NaNNaNZOOLOGICA SCRIPTAhttps://onlinelibrary.wiley.com/journal/14636409xTruepublished0cc_by_nc_nd2021-01-012024-12-310300-3256
12756NaNNaN1863-2378NaNNaNZOONOSES AND PUBLIC HEALTHhttps://onlinelibrary.wiley.com/journal/18632378xTruepublished0cc_by_nc_nd2021-01-012024-12-311863-1959
12757NaNNaN1467-9744NaNNaNZYGON® JOURNAL OF RELIGION AND SCIENCEhttps://onlinelibrary.wiley.com/journal/14679744xTruepublished0cc_by_nc_nd2021-01-012024-12-310591-2385
-

4188 rows × 15 columns

-
- - - - -```python -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_tf = participants.loc[participants['TF'].notna()][['TF', 'ROR']] -rp_tf = rp_all.loc[rp_all['TF'].notna()] -rp_4 = pd.merge(rp_tf, participants_tf, on='TF', how='outer') -rp_4 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNNaN2151-7290NaNxa/b: Auto/Biography StudiesNaNNaNTruepublished0cc_by2021-01-012023-12-310898-9575https://ror.org/04d8ztx87
1NaNNaN2151-7290NaNxa/b: Auto/Biography StudiesNaNNaNTruepublished0cc_by2021-01-012023-12-310898-9575https://ror.org/02bnkt322
2NaNNaN2151-7290NaNxa/b: Auto/Biography StudiesNaNNaNTruepublished0cc_by2021-01-012023-12-310898-9575https://ror.org/01ggx4157
3NaNNaN2151-7290NaNxa/b: Auto/Biography StudiesNaNNaNTruepublished0cc_by2021-01-012023-12-310898-9575https://ror.org/00zg4za48
4NaNNaN2151-7290NaNxa/b: Auto/Biography StudiesNaNNaNTruepublished0cc_by2021-01-012023-12-310898-9575https://ror.org/02s376052
...................................................
110671NaNNaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-310939-7140https://ror.org/01swzsf04
110672NaNNaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-310939-7140https://ror.org/019whta54
110673NaNNaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-310939-7140https://ror.org/00vasag41
110674NaNNaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-310939-7140https://ror.org/05r0ap620
110675NaNNaN2326-2680NaNxZoology in the Middle EastNaNNaNTruepublished0cc_by2021-01-012023-12-310939-7140https://ror.org/05pmsvm27
-

110676 rows × 16 columns

-
- - - - -```python -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_cup = participants.loc[participants['CUP'].notna()][['CUP', 'ROR']] -rp_cup = rp_all.loc[rp_all['CUP'].notna()] -rp_5 = pd.merge(rp_cup, participants_cup, on='CUP', how='outer') -rp_5 -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0xNaN2372-2614NaNNaNAgricultural and Resource Economics Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by2021-01-012023-12-311068-2805https://ror.org/04d8ztx87
1xNaN2372-2614NaNNaNAgricultural and Resource Economics Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by2021-01-012023-12-311068-2805https://ror.org/02bnkt322
2xNaN2372-2614NaNNaNAgricultural and Resource Economics Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by2021-01-012023-12-311068-2805https://ror.org/01ggx4157
3xNaN2372-2614NaNNaNAgricultural and Resource Economics Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by2021-01-012023-12-311068-2805https://ror.org/00zg4za48
4xNaN2372-2614NaNNaNAgricultural and Resource Economics Reviewhttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by2021-01-012023-12-311068-2805https://ror.org/02s376052
...................................................
68259xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/01swzsf04
68260xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/019whta54
68261xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/00vasag41
68262xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05r0ap620
68263xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05pmsvm27
-

68264 rows × 16 columns

-
- - - - -```python -# concat des 5 -rp_fin = rp_1.append(rp_2, ignore_index=True) -rp_fin = rp_fin.append(rp_3, ignore_index=True) -rp_fin = rp_fin.append(rp_4, ignore_index=True) -rp_fin = rp_fin.append(rp_5, ignore_index=True) -rp_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/04d8ztx87
1NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02bnkt322
2NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/00zg4za48
3NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02s376052
4NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/05a28rw58
...................................................
752635xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/01swzsf04
752636xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/019whta54
752637xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/00vasag41
752638xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05r0ap620
752639xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05pmsvm27
-

752640 rows × 16 columns

-
- - - - -```python -# supprimer les doublons et les vides -rp_fin = rp_fin.dropna(subset=['issn']) -rp_fin = rp_fin.drop_duplicates(subset=['issn', 'license', 'ROR']) -rp_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlROR
0NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/04d8ztx87
1NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02bnkt322
2NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/00zg4za48
3NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02s376052
4NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/05a28rw58
...................................................
752635xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/01swzsf04
752636xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/019whta54
752637xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/00vasag41
752638xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05r0ap620
752639xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05pmsvm27
-

751628 rows × 16 columns

-
- - - - -```python -# reindex et ajout de l'id avec l'index + 1 -rp_fin = rp_fin.reset_index() -del rp_fin['index'] -rp_fin = rp_fin.reset_index() -rp_fin['rp_id'] = rp_fin.index + 1 -rp_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_id
00NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/04d8ztx871
11NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02bnkt3222
22NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/00zg4za483
33NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02s3760524
44NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/05a28rw585
.........................................................
751623751623xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/01swzsf04751624
751624751624xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/019whta54751625
751625751625xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/00vasag41751626
751626751626xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05r0ap620751627
751627751627xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05pmsvm27751628
-

751628 rows × 18 columns

-
- - - - -```python -rp_fin['embargo_months'].value_counts() -``` - - - - - 0 683364 - 60 68264 - Name: embargo_months, dtype: int64 - - - - -```python -# test des lignes sans embargo -rp_fin.loc[rp_fin['embargo_months'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_id
-
- - - - -```python -issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnjournalissn_type
010001-28155321
121399-00395322
230001-48424981
341520-48984982
450001-49667891
...............
175517562470-00455333
175617572470-00535332
175717582475-99536082
175817592504-44279941
175917602504-44359943
-

1760 rows × 4 columns

-
- - - - -```python -# merge pour avoir l'issnl -issn = pd.merge(issn, issnl, on='issn', how='left') -issn -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissnjournalissn_typeissnl
010001-281553210001-2815
121399-003953220001-2815
230001-484249810001-4842
341520-489849820001-4842
450001-496678910001-4966
..................
175517562470-004553332470-0045
175617572470-005353322470-0045
175717582475-995360822475-9953
175817592504-442799412504-4427
175917602504-443599432504-4427
-

1760 rows × 5 columns

-
- - - - -```python -issn.loc[issn['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - -
idissnjournalissn_typeissnl
-
- - - - -```python -# merge dans l'autre sens pour garder que les lignes du fichier -rp_fin = pd.merge(rp_fin, issn[['id', 'journal', 'issnl']], on='issnl', how='left') -rp_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_ididjournal
00NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/04d8ztx871NaNNaN
11NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02bnkt3222NaNNaN
22NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/00zg4za483NaNNaN
33NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/02s3760524NaNNaN
44NaNx1876-2859NaNNaNAcademic PediatricsNaNNaNTruepublished0cc_by2020-01-012023-12-311876-2859https://ror.org/05a28rw585NaNNaN
...............................................................
792211751623xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/01swzsf04751624NaNNaN
792212751624xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/019whta54751625NaNNaN
792213751625xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/00vasag41751626NaNNaN
792214751626xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05r0ap620751627NaNNaN
792215751627xNaN1469-8730NaNNaNZygotehttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-310967-1994https://ror.org/05pmsvm27751628NaNNaN
-

792216 rows × 20 columns

-
- - - - -```python -# test des lignes sans embargo -rp_fin.loc[rp_fin['embargo_months'].isna() & rp_fin['id'].notna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_ididjournal
-
- - - - -```python -# garder les lignes avec merge -rp_fin_merge = rp_fin.loc[rp_fin['id'].notna()] -rp_fin_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_ididjournal
176176NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx871771623.0899.0
177176NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx871771624.0899.0
178177NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt3221781623.0899.0
179177NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt3221781624.0899.0
180178NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za481791623.0899.0
...............................................................
788071747485xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag417474861419.0592.0
788072747486xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap6207474871418.0592.0
788073747486xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap6207474871419.0592.0
788074747487xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm277474881418.0592.0
788075747487xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm277474881419.0592.0
-

80671 rows × 20 columns

-
- - - - -```python -# supprimer les doublons et les vides -rp_fin_merge = rp_fin_merge.drop_duplicates(subset=['rp_id']) -rp_fin_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_ididjournal
176176NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx871771623.0899.0
178177NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt3221781623.0899.0
180178NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za481791623.0899.0
182179NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s3760521801623.0899.0
184180NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw581811623.0899.0
...............................................................
788066747483xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf047474841418.0592.0
788068747484xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta547474851418.0592.0
788070747485xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag417474861418.0592.0
788072747486xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap6207474871418.0592.0
788074747487xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm277474881418.0592.0
-

40083 rows × 20 columns

-
- - - - -```python -# test des lignes sans journal -rp_fin_merge.loc[rp_fin_merge['journal'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
indexCUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORrp_ididjournal
-
- - - - -```python -# convertir l'index en id -del rp_fin_merge['id'] -del rp_fin_merge['index'] -del rp_fin_merge['rp_id'] -rp_fin_merge = rp_fin_merge.reset_index() -# ajout de l'id avec l'index + 1 -rp_fin_merge['rp_id'] = rp_fin_merge['index'] + 1 -del rp_fin_merge['index'] -rp_fin_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_id
0NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.0177
1NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.0179
2NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.0181
3NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.0183
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.0185
.........................................................
40078xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.0788067
40079xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.0788069
40080xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.0788071
40081xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.0788073
40082xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.0788075
-

40083 rows × 18 columns

-
- - - - -```python -# convertir l'index en id -del rp_fin_merge['rp_id'] -rp_fin_merge = rp_fin_merge.reset_index() -# ajout de l'id avec l'index + 1 -rp_fin_merge['rp_id'] = rp_fin_merge['index'] + 1 -del rp_fin_merge['index'] -rp_fin_merge -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_id
0NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01
1NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02
2NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03
3NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05
.........................................................
40078xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079
40079xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080
40080xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081
40081xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082
40082xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083
-

40083 rows × 18 columns

-
- - - - -```python -rp_fin_merge['embargo_months'].value_counts() -``` - - - - - 0 39163 - 60 920 - Name: embargo_months, dtype: int64 - - - - -```python -# test des lignes sans embargo -rp_fin_merge.loc[rp_fin_merge['embargo_months'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_id
-
- - - - -```python -# export excel -rp_fin_merge.to_excel('sample/read_publish_brut_merge.xlsx', index=False) -``` - - -```python -# export csv -rp_fin_merge.to_csv('sample/read_publish_brut_merge.tsv', sep='\t', index=False) -``` diff --git a/import_scripts/09_oacct_read_and_publish.py b/import_scripts/09_oacct_read_and_publish.py deleted file mode 100644 index 98ff4da0..00000000 --- a/import_scripts/09_oacct_read_and_publish.py +++ /dev/null @@ -1,607 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet de modifier les données extraites des differentes sources et les exporter dans les tables de l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 08.09.2021 - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) -# definir le debut des ids -id_start = 1 - - -# ## Ajout des rabais pour les revues des licences Read & Publish -# -# Journals list by publisher : -# * https://consortium.ch/elsevier_titlelist_publication -# * https://consortium.ch/springer_titlelist_publication -# * https://consortium.ch/wiley_titlelist_publish -# * https://consortium.ch/tandf_titlelist_publish -# * https://consortium.ch/sage_titlelist_publish -# * https://consortium.ch/cup_titlelist_publish -# -# Licence term : -# * Elsevier : 2020-2023 -# * Springer Nature : 2020-2022 -# * Wiley : 2021-2024 -# * Taylor & Francis : 2021-2023 -# * Cambridge University Press (CUP) : 2021-2023 -# -# CC licences : -# * Elsevier : CC-BY, CC-BY-NC-ND -# * Springer Nature : CC-BY, CC-BY-NC -# * Wiley : CC-BY, CC-BY-NC, CC-BY-NC-ND -# * Taylor & Francis : CC-BY -# * Cambridge University Press (CUP) : CC-BY, CC-BY-NC, CC-BY-NC-ND, CC-BY-NC-SA -# -# Special conditions : -# * Cambridge University Press (CUP) : Only the following article types are covered: Research Articles, Review Articles, Rapid Communication, Brief Reports and Case Reports -# -# - -# ## Import du fichier des issns - -# In[2]: - - -issn = pd.read_csv('sample/issn.tsv', encoding='utf-8', header=0, sep='\t') -issn - - -# In[3]: - - -# open publishers -publisher = pd.read_csv('sample/publisher.tsv', encoding='utf-8', header=0, sep='\t') -publisher - - -# In[4]: - - -publisher.loc[publisher['name'] == 'Elsevier'] - - -# In[5]: - - -publisher.loc[(publisher['name'] == 'Springer Verlag') | (publisher['name'] == 'Nature Research')] - - -# In[6]: - - -publisher.loc[publisher['name'] == 'Wiley'] - - -# In[7]: - - -publisher.loc[publisher['name'] == 'Taylor and Francis'] - - -# In[8]: - - -publisher.loc[publisher['name'] == 'Cambridge University Press'] - - -# In[9]: - - -# ouvrir la liste d'organisations -participants = pd.read_csv('agreements/consortium_institutions_participation_read_and_publish.csv', encoding='utf-8', header=0, sep='\t') -participants - - -# In[10]: - - -# suppression de Lib4RI qui est une bibliothèque -participants = participants.loc[participants['Institution'] != 'Lib4RI'] -participants - - -# In[11]: - - -# ajout de TF et CUP pour tous (TODO : obtenir la liste des bibliothèques pour ces deux licences) -participants['TF'] = 'x' -participants['CUP'] = 'x' -participants - - -# In[12]: - - -# ouvrir la liste des journaux Elsevier -elsevier = pd.read_excel('agreements/Elsevier_titlelist_publication.xlsx', skiprows=7) -elsevier - - -# In[13]: - - -# ajout du champ version -elsevier['article_version'] = 'published' -elsevier - - -# In[14]: - - -# ajout des dates -elsevier['valid_from'] = '2020-01-01' -elsevier['valid_until'] = '2023-12-31' -elsevier - - -# In[15]: - - -# ajout du embargo et archiving -elsevier['embargo_months'] = 0 -elsevier['archiving'] = True -elsevier - - -# In[16]: - - -elsevier.iloc[elsevier.shape[0]-1] - - -# In[17]: - - -# ajout du champ license -# cc_by, cc_by_nc_nd -rp = pd.DataFrame() -elsevier['article_version'] = 'published' -elsevier['license'] = 'cc_by' -elsevier['Elsevier'] = 'x' -rp = rp.append(elsevier, ignore_index=True) -elsevier['license'] = 'cc_by_nc_nd' -rp = rp.append(elsevier, ignore_index=True) -rp - - -# In[18]: - - -# ouvrir la liste des journaux Springer Nature -springer = pd.read_excel('agreements/Springer_titlelist_publication.xlsx', skiprows=7) -springer - - -# In[19]: - - -# ajout du champ license -# cc_by, cc_by_nc -springer['article_version'] = 'published' -springer['license'] = 'cc_by' -springer['Springer Nature'] = 'x' -# ajout des dates -springer['valid_from'] = '2020-01-01' -springer['valid_until'] = '2022-12-31' -# ajout du embargo et archiving -springer['embargo_months'] = 0 -springer['archiving'] = True - - -# In[20]: - - -# append -rp = rp.append(springer, ignore_index=True) -springer['license'] = 'cc_by_nc' -rp = rp.append(springer, ignore_index=True) -rp - - -# In[21]: - - -# ouvrir la liste des journaux Wiley -wiley = pd.read_excel('agreements/Wiley_titlelist_publish.xlsx', skiprows=7) -wiley - - -# In[22]: - - -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd -wiley['article_version'] = 'published' -wiley['license'] = 'cc_by' -wiley['Wiley'] = 'x' -# ajout des dates -wiley['valid_from'] = '2021-01-01' -wiley['valid_until'] = '2024-12-31' -# ajout du embargo et archiving -wiley['embargo_months'] = 0 -wiley['archiving'] = True -rp = rp.append(wiley, ignore_index=True) -# append avec une autre licence -wiley['license'] = 'cc_by_nc' -rp = rp.append(wiley, ignore_index=True) -# append avec une autre licence -wiley['license'] = 'cc_by_nc_nd' -rp = rp.append(wiley, ignore_index=True) -rp - - -# In[23]: - - -# ouvrir la liste des journaux TF -tf = pd.read_excel('agreements/TandF_titlelist_publish.xlsx', skiprows=7) -tf - - -# In[24]: - - -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd -tf['article_version'] = 'published' -tf['license'] = 'cc_by' -tf['TF'] = 'x' -# ajout des dates -tf['valid_from'] = '2021-01-01' -tf['valid_until'] = '2023-12-31' -# ajout du embargo et archiving -tf['embargo_months'] = 0 -tf['archiving'] = True - - -# In[25]: - - -# append -rp = rp.append(tf, ignore_index=True) -rp - - -# In[26]: - - -# ouvrir la liste des journaux CUP -cup = pd.read_excel('agreements/CUP_Journals_titlelist_publish.xlsx', skiprows=7) -cup - - -# In[27]: - - -# renommer l'ISSN -cup = cup.rename(columns = {'e-ISSN' : 'ISSN'}) -cup - - -# In[28]: - - -# ajout du champ license -# cc_by, cc_by_nc, cc_by_nc_nd, cc_by_nc_sa -cup['article_version'] = 'published' -cup['license'] = 'cc_by' -cup['CUP'] = 'x' -# ajout des dates -cup['valid_from'] = '2021-01-01' -cup['valid_until'] = '2023-12-31' -# ajout du embargo et archiving -cup['embargo_months'] = 60 -cup['archiving'] = True - - -# In[29]: - - -# append -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc' -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc_nd' -rp = rp.append(cup, ignore_index=True) -cup['license'] = 'cc_by_nc_sa' -rp = rp.append(cup, ignore_index=True) -rp - - -# In[30]: - - -# test des lignes sans embargo -rp.loc[rp['embargo_months'].isna()] - - -# In[31]: - - -# ajout des ISSN-L -issnl = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issnl - - -# In[32]: - - -# renommer les colonnes -issnl = issnl.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -rp = rp.rename(columns={'ISSN' : 'issn'}) - - -# In[33]: - - -# merge -rp = pd.merge(rp, issnl, on='issn', how='left') -rp - - -# In[34]: - - -# cummuler les issns pour le merge -# rp_1 = rp.loc[rp['issnl'].notna()][['issnl', 'article_version', 'license', 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP']] -# rp_1 = rp_1.rename(columns = {'issnl' : 'issn'}) -# rp_2 = rp.loc[rp['issn'].notna()][['issn', 'article_version', 'license', 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP']] -# rp_all = rp_1.append(rp_2, ignore_index=True) -rp_all = rp - - -# In[35]: - - -# ajouter les champs manquants -# valeur discount (id 2) à 100% pour les licences read & publish -# elsevier['amount'] = 100 -# elsevier['symbol'] = '%' -# elsevier['cost_factor_type'] = 2 -# elsevier['comment'] = 'Source: swissuniversities' -# elsevier - - -# In[36]: - - -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_elsevier = participants.loc[participants['Elsevier'].notna()][['Elsevier', 'ROR']] -rp_elsevier = rp_all.loc[rp_all['Elsevier'].notna()] -rp_1 = pd.merge(rp_elsevier, participants_elsevier, on='Elsevier', how='outer') -rp_1 - - -# In[37]: - - -rp_elsevier - - -# In[38]: - - -participants_elsevier - - -# In[39]: - - -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_springer = participants.loc[participants['Springer Nature'].notna()][['Springer Nature', 'ROR']] -rp_springer = rp_all.loc[rp_all['Springer Nature'].notna()] -rp_2 = pd.merge(rp_springer, participants_springer, on='Springer Nature', how='outer') -rp_2 - - -# In[40]: - - -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_wiley = participants.loc[participants['Wiley'].notna()][['Wiley', 'ROR']] -rp_wiley = rp_all.loc[rp_all['Wiley'].notna()] -rp_3 = pd.merge(rp_wiley, participants_wiley, on='Wiley', how='outer') -rp_3 - - -# In[41]: - - -rp_wiley - - -# In[42]: - - -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_tf = participants.loc[participants['TF'].notna()][['TF', 'ROR']] -rp_tf = rp_all.loc[rp_all['TF'].notna()] -rp_4 = pd.merge(rp_tf, participants_tf, on='TF', how='outer') -rp_4 - - -# In[43]: - - -# merge avec les organisations -# 'Elsevier', 'Springer Nature', 'Wiley', 'TF', 'CUP' -participants_cup = participants.loc[participants['CUP'].notna()][['CUP', 'ROR']] -rp_cup = rp_all.loc[rp_all['CUP'].notna()] -rp_5 = pd.merge(rp_cup, participants_cup, on='CUP', how='outer') -rp_5 - - -# In[44]: - - -# concat des 5 -rp_fin = rp_1.append(rp_2, ignore_index=True) -rp_fin = rp_fin.append(rp_3, ignore_index=True) -rp_fin = rp_fin.append(rp_4, ignore_index=True) -rp_fin = rp_fin.append(rp_5, ignore_index=True) -rp_fin - - -# In[45]: - - -# supprimer les doublons et les vides -rp_fin = rp_fin.dropna(subset=['issn']) -rp_fin = rp_fin.drop_duplicates(subset=['issn', 'license', 'ROR']) -rp_fin - - -# In[46]: - - -# reindex et ajout de l'id avec l'index + 1 -rp_fin = rp_fin.reset_index() -del rp_fin['index'] -rp_fin = rp_fin.reset_index() -rp_fin['rp_id'] = rp_fin.index + 1 -rp_fin - - -# In[47]: - - -rp_fin['embargo_months'].value_counts() - - -# In[48]: - - -# test des lignes sans embargo -rp_fin.loc[rp_fin['embargo_months'].isna()] - - -# In[49]: - - -issn - - -# In[50]: - - -# merge pour avoir l'issnl -issn = pd.merge(issn, issnl, on='issn', how='left') -issn - - -# In[51]: - - -issn.loc[issn['issnl'].isna()] - - -# In[52]: - - -# merge dans l'autre sens pour garder que les lignes du fichier -rp_fin = pd.merge(rp_fin, issn[['id', 'journal', 'issnl']], on='issnl', how='left') -rp_fin - - -# In[53]: - - -# test des lignes sans embargo -rp_fin.loc[rp_fin['embargo_months'].isna() & rp_fin['id'].notna()] - - -# In[54]: - - -# garder les lignes avec merge -rp_fin_merge = rp_fin.loc[rp_fin['id'].notna()] -rp_fin_merge - - -# In[55]: - - -# supprimer les doublons et les vides -rp_fin_merge = rp_fin_merge.drop_duplicates(subset=['rp_id']) -rp_fin_merge - - -# In[56]: - - -# test des lignes sans journal -rp_fin_merge.loc[rp_fin_merge['journal'].isna()] - - -# In[57]: - - -# convertir l'index en id -del rp_fin_merge['id'] -del rp_fin_merge['index'] -del rp_fin_merge['rp_id'] -rp_fin_merge = rp_fin_merge.reset_index() -# ajout de l'id avec l'index + 1 -rp_fin_merge['rp_id'] = rp_fin_merge['index'] + 1 -del rp_fin_merge['index'] -rp_fin_merge - - -# In[58]: - - -# convertir l'index en id -del rp_fin_merge['rp_id'] -rp_fin_merge = rp_fin_merge.reset_index() -# ajout de l'id avec l'index + 1 -rp_fin_merge['rp_id'] = rp_fin_merge['index'] + 1 -del rp_fin_merge['index'] -rp_fin_merge - - -# In[59]: - - -rp_fin_merge['embargo_months'].value_counts() - - -# In[60]: - - -# test des lignes sans embargo -rp_fin_merge.loc[rp_fin_merge['embargo_months'].isna()] - - -# In[61]: - - -# export excel -rp_fin_merge.to_excel('sample/read_publish_brut_merge.xlsx', index=False) - - -# In[62]: - - -# export csv -rp_fin_merge.to_csv('sample/read_publish_brut_merge.tsv', sep='\t', index=False) - diff --git a/import_scripts/10_oacct_terms.md b/import_scripts/10_oacct_terms.md deleted file mode 100644 index 9b95fd74..00000000 --- a/import_scripts/10_oacct_terms.md +++ /dev/null @@ -1,39541 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet de modifier les données extraites des differentes sources et les exporter dans les tables de l'application OACCT. - -Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -Date de dernière mise à jour : 08.09.2021 - - -```python -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) -# definir le debut des ids -id_start = 1 -``` - -## Import du fichier extrait de Sherpa - - -```python -sherpa = pd.read_csv('sample/sherpa_policies_brut.tsv', encoding='utf-8', header=0, sep='\t') -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN1
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN2
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN3
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN4
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN5
..........................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8591
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8592
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN8593
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8594
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN8595
-

8595 rows × 29 columns

-
- - - - -```python -# test des valeurs pour les versions -sherpa['article_version'].value_counts() -``` - - - - - published 4688 - accepted 3251 - submitted 656 - Name: article_version, dtype: int64 - - - - -```python -# test des valeurs pour les issns -sherpa.loc[sherpa['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid
-
- - - - -```python -# ajout des ISSN-L -issns = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
ISSNISSN-L
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -# renommer les colonnes -issns = issns.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -issns -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnl
00000-00190000-0019
10000-00270000-0027
20000-00430000-0043
30000-00510000-0051
40000-006X0000-006X
.........
19959138756-99578756-9957
19959148756-99658756-9965
19959158756-99738756-9973
19959168756-99818756-9981
19959178756-999X8756-999X
-

1995918 rows × 2 columns

-
- - - - -```python -# merge avec la table sherpa -sherpa = pd.merge(sherpa, issns, on='issn', how='left') -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnl
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-2815
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-2815
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-2815
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-2815
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-4842
.............................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-9953
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-9953
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-9953
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-9953
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-9953
-

8595 rows × 30 columns

-
- - - - -```python -# test des valeurs pour les issnl -sherpa.loc[sherpa['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnl
-
- - - - -```python -# extraction des données IR Archiving + Embargo par ISSN -sherpa_ir = sherpa[['issnl', ]] -``` - -## Import du fichier des licences Read & Publish - - -```python -rp = pd.read_csv('sample/read_publish_brut_merge.tsv', encoding='utf-8', header=0, sep='\t') -rp -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3058: DtypeWarning: Columns (0,1,3,4) have mixed types. Specify dtype option on import or set low_memory=False. - interactivity=interactivity, compiler=compiler, result=result) - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_id
0NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01
1NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02
2NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03
3NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05
.........................................................
40078xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079
40079xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080
40080xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081
40081xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082
40082xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083
-

40083 rows × 18 columns

-
- - - - -```python -rp['embargo_months'].value_counts() -``` - - - - - 0 39163 - 60 920 - Name: embargo_months, dtype: int64 - - - - -```python -# ajout de l'éditeur dans un seul champ -# rp.loc[rp['Elsevier'] == 'x', 'public_notes'] = 'Elsevier Read & Publish agreement' -rp.loc[rp['Elsevier'] == 'x', 'rp_publisher'] = 'Elsevier' -rp.loc[rp['Springer Nature'] == 'x', 'rp_publisher'] = 'Springer Nature' -rp.loc[rp['Wiley'] == 'x', 'rp_publisher'] = 'Wiley' -rp.loc[rp['TF'] == 'x', 'rp_publisher'] = 'TF' -rp.loc[rp['CUP'] == 'x', 'rp_publisher'] = 'CUP' -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CUPElsevierissnSpringer NatureTFTitleURLWileyarchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_idrp_publisher
0NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier
1NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier
2NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier
3NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier
4NaNx1742-7061NaNNaNActa BiomaterialiaNaNNaNTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier
............................................................
40078xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP
40079xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP
40080xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP
40081xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP
40082xNaN1435-8115NaNNaNMicroscopy and Microanalysishttp://www.cambridge.org/core/product/identifi...NaNTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP
-

40083 rows × 19 columns

-
- - - - -```python -# test des valeurs pour les versions -rp['rp_publisher'].value_counts() -``` - - - - - Elsevier 18128 - Wiley 13905 - Springer Nature 6716 - CUP 920 - TF 414 - Name: rp_publisher, dtype: int64 - - - - -```python -# test des valeurs pour les versions -rp['license'].value_counts() -``` - - - - - cc_by 17701 - cc_by_nc_nd 13929 - cc_by_nc 8223 - cc_by_nc_sa 230 - Name: license, dtype: int64 - - - - -```python -# supprimer les champs inutiles et renommer les colonnes -del rp['Elsevier'] -del rp['Springer Nature'] -del rp['Wiley'] -del rp['TF'] -del rp['CUP'] -del rp['URL'] -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnTitlearchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlRORjournalrp_idrp_publisher
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier
..........................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP
-

40083 rows × 13 columns

-
- - - - -```python -# renommer les colonnes -rp = rp.rename(columns = {'Title' : 'title', 'ROR' : 'ror', 'read_publish_id' : 'rp_id'}) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlrorjournalrp_idrp_publisher
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier
..........................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP
-

40083 rows × 13 columns

-
- - - -## Table applicable_version - - -```python -# creation du DF -col_names = ['id', - 'type', - 'description' - ] -applicable_version = pd.DataFrame(columns = col_names) -# 3 values : published, accepted, submitted -new_row1 = {'id':1, 'type':'submitted', 'description' : 'Submitted version'} -new_row2 = {'id':2, 'type':'accepted', 'description' : 'Accepted version'} -new_row3 = {'id':3, 'type':'published', 'description' : 'Published version'} -#append row to the dataframe -applicable_version = applicable_version.append(new_row1, ignore_index=True) -applicable_version = applicable_version.append(new_row2, ignore_index=True) -applicable_version = applicable_version.append(new_row3, ignore_index=True) -applicable_version -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idtypedescription
01submittedSubmitted version
12acceptedAccepted version
23publishedPublished version
-
- - - - -```python -# ajout de la valeur UNKNOWN -applicable_version = applicable_version.append({'id' : 999999, 'type' : 'UNKNOWN', 'description' : 'UNKNOWN'}, ignore_index=True) -applicable_version -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idtypedescription
01submittedSubmitted version
12acceptedAccepted version
23publishedPublished version
3999999UNKNOWNUNKNOWN
-
- - - - -```python -# renommage des champs finaux -applicable_version_export = applicable_version[['id', 'description']] -``` - - -```python -# export de la table applicable_version -result = applicable_version_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/version.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -applicable_version_export.to_csv('sample/version.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -applicable_version_export.to_excel('sample/version.xlsx', index=False) -``` - - -```python -# merge avec la table sherpa -sherpa = pd.merge(sherpa, applicable_version[['id', 'type']], left_on='article_version', right_on='type', how='left') -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid_xissnlid_ytype
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151submitted
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152accepted
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-28153published
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-28153published
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421submitted
...................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531submitted
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532accepted
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533published
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-99533published
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-99533published
-

8595 rows × 32 columns

-
- - - - -```python -sherpa = sherpa.rename(columns = {'id_x' : 'id', 'id_y' : 'version'}) -del sherpa['type'] -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionlicenseembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversion
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-28153
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-28153
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421
................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-99533
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-99533
-

8595 rows × 31 columns

-
- - - - -```python -# merge avec la table read & publish -rp = pd.merge(rp, applicable_version[['id', 'type']], left_on='article_version', right_on='type', how='left') -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlrorjournalrp_idrp_publisheridtype
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier3published
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier3published
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier3published
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier3published
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier3published
................................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP3published
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP3published
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP3published
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP3published
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP3published
-

40083 rows × 15 columns

-
- - - - -```python -rp = rp.rename(columns = {'id' : 'version'}) -del rp['type'] -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthslicensevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversion
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier3
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier3
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier3
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier3
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier3
.............................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP3
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP3
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP3
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP3
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP3
-

40083 rows × 14 columns

-
- - - -## Table oa_licence - - -```python -# creation du DF -# 'version' n'est pas utilisée, on dédoublonne par nom sans la version -col_names = ['id', - 'name', - 'url' - ] -oa_licence = pd.DataFrame(columns = col_names) -oa_licence -``` - - - - -
- - - - - - - - - - - - -
idnameurl
-
- - - - -```python -# export des licences -sherpa['license'].value_counts() -``` - - - - - cc_by 4151 - cc_by_nc_nd 2338 - cc_by_nc 559 - bespoke_license 47 - cc_by_nc_sa 20 - cc_by_nd 7 - cc_by_sa 4 - cc0 3 - all_rights_reserved 1 - Name: license, dtype: int64 - - - - -```python -sherpa_licences = sherpa['license'].drop_duplicates() -sherpa_licences = sherpa_licences.dropna() -sherpa_licences -``` - - - - - 2 cc_by - 3 cc_by_nc_nd - 8 bespoke_license - 29 cc_by_nc - 425 cc_by_nc_sa - 443 all_rights_reserved - 2147 cc_by_sa - 2148 cc_by_nd - 8420 cc0 - Name: license, dtype: object - - - - -```python -oa_licence['sherpa_code'] = np.nan -oa_licence -``` - - - - -
- - - - - - - - - - - - - -
idnameurlsherpa_code
-
- - - - -```python -for code in sherpa_licences: - print (code) - oa_licence = oa_licence.append({'sherpa_code' : code}, ignore_index=True) -``` - - cc_by - cc_by_nc_nd - bespoke_license - cc_by_nc - cc_by_nc_sa - all_rights_reserved - cc_by_sa - cc_by_nd - cc0 - - - -```python -oa_licence -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnameurlsherpa_code
0NaNNaNNaNcc_by
1NaNNaNNaNcc_by_nc_nd
2NaNNaNNaNbespoke_license
3NaNNaNNaNcc_by_nc
4NaNNaNNaNcc_by_nc_sa
5NaNNaNNaNall_rights_reserved
6NaNNaNNaNcc_by_sa
7NaNNaNNaNcc_by_nd
8NaNNaNNaNcc0
-
- - - - -```python -# convertir l'index en id -oa_licence = oa_licence.reset_index() -# ajout de l'id avec l'index + 1 -oa_licence['id'] = oa_licence['index'] + 1 -del oa_licence['index'] -oa_licence -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnameurlsherpa_code
01NaNNaNcc_by
12NaNNaNcc_by_nc_nd
23NaNNaNbespoke_license
34NaNNaNcc_by_nc
45NaNNaNcc_by_nc_sa
56NaNNaNall_rights_reserved
67NaNNaNcc_by_sa
78NaNNaNcc_by_nd
89NaNNaNcc0
-
- - - - -```python -# ajout du nom et des URLs -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by', 'name'] = 'CC BY' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by', 'url'] = 'https://creativecommons.org/licenses/by/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_sa', 'name'] = 'CC BY-SA' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_sa', 'url'] = 'https://creativecommons.org/licenses/by-sa/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc', 'name'] = 'CC BY-NC' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc', 'url'] = 'https://creativecommons.org/licenses/by-nc/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_sa', 'name'] = 'CC BY-NC-SA' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_sa', 'url'] = 'https://creativecommons.org/licenses/by-nc-sa/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nd', 'name'] = 'CC BY-ND' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nd', 'url'] = 'https://creativecommons.org/licenses/by-nd/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_nd', 'name'] = 'CC BY-NC-ND' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_nd', 'url'] = 'https://creativecommons.org/licenses/by-nc-nd/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc0', 'name'] = 'CC0' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc0', 'url'] = 'https://creativecommons.org/publicdomain/zero/1.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'name'] = 'Specific license' -oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'url'] = '' -oa_licence.loc[oa_licence['sherpa_code'] == 'all_rights_reserved', 'name'] = 'All rights reserved' -oa_licence.loc[oa_licence['sherpa_code'] == 'all_rights_reserved', 'url'] = '' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_gnu_gpl', 'name'] = 'GNU GPL' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_gnu_gpl', 'url'] = 'http://gnugpl.org/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_public_domain', 'name'] = 'Public domain' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_public_domain', 'url'] = 'https://creativecommons.org/share-your-work/public-domain/' -# oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'url'] = 'https://port.sas.ac.uk/mod/book/view.php?id=1340&chapterid=1003' -oa_licence -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnameurlsherpa_code
01CC BYhttps://creativecommons.org/licenses/by/4.0/cc_by
12CC BY-NC-NDhttps://creativecommons.org/licenses/by-nc-nd/...cc_by_nc_nd
23Specific licensebespoke_license
34CC BY-NChttps://creativecommons.org/licenses/by-nc/4.0/cc_by_nc
45CC BY-NC-SAhttps://creativecommons.org/licenses/by-nc-sa/...cc_by_nc_sa
56All rights reservedall_rights_reserved
67CC BY-SAhttps://creativecommons.org/licenses/by-sa/4.0/cc_by_sa
78CC BY-NDhttps://creativecommons.org/licenses/by-nd/4.0/cc_by_nd
89CC0https://creativecommons.org/publicdomain/zero/...cc0
-
- - - - -```python -# ajout de la valeur UNKNOWN -oa_licence = oa_licence.append({'id' : 999999, 'sherpa_code' : '___', 'name' : 'UNKNOWN', 'url' : ''}, ignore_index=True) -oa_licence -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idnameurlsherpa_code
01CC BYhttps://creativecommons.org/licenses/by/4.0/cc_by
12CC BY-NC-NDhttps://creativecommons.org/licenses/by-nc-nd/...cc_by_nc_nd
23Specific licensebespoke_license
34CC BY-NChttps://creativecommons.org/licenses/by-nc/4.0/cc_by_nc
45CC BY-NC-SAhttps://creativecommons.org/licenses/by-nc-sa/...cc_by_nc_sa
56All rights reservedall_rights_reserved
67CC BY-SAhttps://creativecommons.org/licenses/by-sa/4.0/cc_by_sa
78CC BY-NDhttps://creativecommons.org/licenses/by-nd/4.0/cc_by_nd
89CC0https://creativecommons.org/publicdomain/zero/...cc0
9999999UNKNOWN___
-
- - - - -```python -# ajout aux tables sherpa et rp -sherpa = sherpa.rename(columns = {'license' : 'sherpa_code'}) -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversion
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-28153
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-28153
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421
................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-99533
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-99533
-

8595 rows × 31 columns

-
- - - - -```python -# ajout aux tables sherpa et rp -rp = rp.rename(columns = {'license' : 'sherpa_code'}) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthssherpa_codevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversion
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier3
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier3
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier3
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier3
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier3
.............................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP3
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP3
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP3
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP3
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP3
-

40083 rows × 14 columns

-
- - - - -```python -# merge -sherpa = pd.merge(sherpa, oa_licence[['sherpa_code', 'id']], on='sherpa_code', how='left') -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesid_xissnlversionid_y
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151NaN
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152NaN
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421NaN
...................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531NaN
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532NaN
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0
-

8595 rows × 32 columns

-
- - - - -```python -sherpa = sherpa.rename(columns = {'id_x' : 'id', 'id_y' : 'licence'}) -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicence
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151NaN
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152NaN
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421NaN
...................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531NaN
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532NaN
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0
-

8595 rows × 32 columns

-
- - - - -```python -# merge -rp = pd.merge(rp, oa_licence[['sherpa_code', 'id']], on='sherpa_code', how='left') -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthssherpa_codevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversionid
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier31
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier31
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier31
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier31
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier31
................................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP35
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP35
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP35
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP35
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP35
-

40083 rows × 15 columns

-
- - - - -```python -rp = rp.rename(columns = {'id' : 'licence'}) -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthssherpa_codevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversionlicence
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier31
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier31
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier31
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier31
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier31
................................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP35
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP35
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP35
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP35
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP35
-

40083 rows × 15 columns

-
- - - - -```python -# renommage des champs finaux -oa_licence_export = oa_licence[['id', 'name', 'url']] -oa_licence_export = oa_licence_export.rename(columns={'name' : 'name_or_abbrev', 'url' : 'website'}) -``` - - -```python -# export de la table oa_licence -result = oa_licence_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/licence.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -oa_licence_export.to_csv('sample/licence.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -oa_licence_export.to_excel('sample/licence.xlsx', index=False) -``` - -## Table cost_factor_type - - -```python -# creation du DF -col_names = ['id', - 'name' - ] -cost_factor_type = pd.DataFrame(columns = col_names) -cost_factor_type = cost_factor_type.append({'id' : 1, 'name' : 'APC'}, ignore_index=True) -cost_factor_type = cost_factor_type.append({'id' : 2, 'name' : 'Discount'}, ignore_index=True) -cost_factor_type = cost_factor_type.append({'id' : 3, 'name' : 'Refund'}, ignore_index=True) -cost_factor_type -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - -
idname
01APC
12Discount
23Refund
-
- - - - -```python -# ajout de la valeur UNKNOWN -cost_factor_type = cost_factor_type.append({'id' : 999999, 'name' : 'UNKNOWN'}, ignore_index=True) -cost_factor_type -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idname
01APC
12Discount
23Refund
3999999UNKNOWN
-
- - - - -```python -# export de la table -result = cost_factor_type.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/cost_factor_type.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -cost_factor_type.to_csv('sample/cost_factor_type.tsv', sep='\t', encoding='utf-8', index=False) -``` - - -```python -# export excel -cost_factor_type.to_excel('sample/cost_factor_type.xlsx', index=False) -``` - -## Table cost_factor - -### Ajout des données des APCs depuis DOAJ - - -```python -# ajout de DOAJ info -doaj = pd.read_csv('doaj/journalcsv__doaj_20210312_0636_utf8.csv', encoding='utf-8', header=0) -doaj -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal titleJournal URLURL in DOAJAlternative titleJournal ISSN (print version)Journal EISSN (online version)KeywordsLanguages in which the journal accepts manuscriptsPublisherCountry of publisherSociety or institutionCountry of society or institutionJournal licenseLicense attributesURL for license termsMachine-readable CC licensing information embedded or displayed in articlesURL to an example page with embedded licensing informationAuthor holds copyright without restrictionsCopyright information URLReview processReview process information URLJournal plagiarism screening policyPlagiarism information URLURL for journal's aims & scopeURL for the Editorial Board pageURL for journal's instructions for authorsAverage number of weeks between article submission and publicationAPCAPC information URLAPC amountJournal waiver policy (for developing country authors etc)Waiver policy information URLHas other feesOther submission fees information URLPreservation ServicesPreservation Service: national libraryPreservation information URLDeposit policy directoryURL for deposit policyPersistent article identifiersArticle metadata includes ORCIDsJournal complies with I4OC standards for open citationsDoes this journal allow unrestricted reuse in compliance with BOAI?URL for journal's Open Access statementContinuesContinued ByLCC CodesSubjectsDOAJ SealAdded on DateLast updated DateNumber of Article RecordsMost Recent Article Added
0Anais da Academia Brasileira de Ciênciashttp://www.scielo.br/scielo.php?script=sci_ser...https://doaj.org/toc/ed09859a464f4461b1af34279...Annals of the Brazilian Academy of Sciences0001-37651678-2690biological sciences, exact and earth sciences,...EnglishAcademia Brasileira de CiênciasBrazilNaNNaNCC BYNaNhttp://www.scielo.br/revistas/aabc/iaboutj.htmYeshttp://www.scielo.br/scielo.php?script=sci_art...NoNaNPeer reviewhttp://www.scielo.br/revistas/aabc/iinstruc.htmYeshttp://www.scielo.br/revistas/aabc/iinstruc.htmhttp://www.scielo.br/revistas/aabc/iaboutj.htmhttp://www.scielo.br/revistas/aabc/iedboard.htmhttp://www.scielo.br/revistas/aabc/iinstruc.htm18Nohttp://www.scielo.br/revistas/aabc/iinstruc.htmNaNNoNaNNohttp://www.scielo.br/revistas/aabc/iinstruc.htmNaNNaNNaNNaNNaNDOINaNNaNYeshttp://www.scielo.br/revistas/aabc/isubscrp.htmNaNNaNQScienceNo2004-04-23T21:31:00Z2017-01-04T14:19:54Z26492020-06-10T21:49:11Z
1ACMEhttp://riviste.unimi.it/index.php/ACMEhttps://doaj.org/toc/b1ca04ba56194f29a362b3eef...NaN0001-494X2282-0035italian literature, classic literature, lingui...ItalianUniversità degli Studi di MilanoItalyNaNNaNCC BY-NC-NDNaNhttp://riviste.unimi.it/index.php/ACME/indexYeshttp://riviste.unimi.it/index.php/ACME/article...Yeshttp://riviste.unimi.it/index.php/ACME/about/e...Blind peer reviewhttps://riviste.unimi.it/index.php/ACME/aboutNoNaNhttps://riviste.unimi.it/index.php/ACME/abouthttps://riviste.unimi.it/index.php/ACME/about/...http://riviste.unimi.it/index.php/ACME/about/s...12Nohttps://riviste.unimi.it/index.php/Lebenswelt/...NaNNoNaNNohttps://riviste.unimi.it/index.php/Lebenswelt/...NaNItalian National Library (BNCF)http://www.depositolegale.it/NaNNaNDOI, NBNNaNNaNYeshttp://riviste.unimi.it/index.php/ACME/about/e...NaNNaNAGeneral WorksNo2014-12-22T19:55:58Z2020-02-24T09:07:42Z1662020-06-19T09:42:34Z
2Acta Dermato-Venereologicahttp://www.medicaljournals.se/actahttps://doaj.org/toc/ffde9666ab1d46f1a8c688ce6...NaN0001-55551651-2057sexually transmitted infections, psoriasis, ps...EnglishSociety for Publication of Acta Dermato-Venere...SwedenNaNNaNCC BY-NCNaNhttps://www.medicaljournals.se/acta/open-acces...NaNNaNNoNaNPeer reviewhttps://www.medicaljournals.se/acta/instructio...NoNaNhttp://www.medicaljournals.se/actahttps://www.medicaljournals.se/acta/editorshttps://www.medicaljournals.se/acta/instructio...20Yeshttps://www.medicaljournals.se/acta/instructio...1600 EURNoNaNYeshttps://www.medicaljournals.se/acta/instructio...NaNNaNhttp://www.ingentaconnect.com/publisher/claimi...Sherpa/RomeoNaNDOINaNNaNYeshttps://www.medicaljournals.se/acta/open-acces...NaNNaNRL1-803Medicine: DermatologyNo2011-11-10T12:31:05Z2017-02-22T11:14:48Z10962021-03-11T13:41:33Z
3Acta Médica Costarricensehttp://actamedica.medicos.cr/index.php/Acta_Me...https://doaj.org/toc/a5919aee5ad2413a89cf32df0...NaN0001-60122215-5856medicine, public health, medical sciences, healthEnglish, SpanishColegio de Médicos y Cirujanos de Costa RicaCosta RicaNaNNaNCC BY-NC-SANaNhttp://actamedica.medicos.cr/index.php/Acta_Me...NaNNaNNohttp://actamedica.medicos.cr/index.php/Acta_Me...Double blind peer reviewhttp://actamedica.medicos.cr/index.php/Acta_Me...Yeshttp://actamedica.medicos.cr/index.php/Acta_Me...http://actamedica.medicos.cr/index.php/Acta_Me...http://actamedica.medicos.cr/index.php/Acta_Me...http://actamedica.medicos.cr/index.php/Acta_Me...12Nohttp://actamedica.medicos.cr/index.php/Acta_Me...NaNNoNaNNoNaNPKP PNNaNhttp://actamedica.medicos.cr/index.php/Acta_Me...Sherpa/Romeohttp://actamedica.medicos.cr/index.php/Acta_Me...NaNNoNoYeshttp://actamedica.medicos.cr/index.php/Acta_Me...NaNNaNRMedicineNo2020-12-22T11:08:24Z2020-12-22T11:08:24Z12072015-12-08T15:06:43Z
4Acta Mycologicahttps://pbsociety.org.pl/journals/index.php/am...https://doaj.org/toc/0e8e2531ae3f455ebb49acb08...NaN0001-625X2353-074Xmycology, micromycetes, marcomycetes, slime mo...EnglishPolish Botanical SocietyPolandNaNNaNCC BYNaNhttps://pbsociety.org.pl/journals/index.php/am...Yeshttps://doi.org/10.5586/am.5511Yeshttps://pbsociety.org.pl/journals/index.php/am...Double blind peer reviewhttps://pbsociety.org.pl/journals/index.php/am...Yeshttps://pbsociety.org.pl/journals/index.php/am...https://pbsociety.org.pl/journals/index.php/am...https://pbsociety.org.pl/journals/index.php/am...https://pbsociety.org.pl/journals/index.php/am...16Yeshttps://pbsociety.org.pl/journals/index.php/am...400 EURNoNaNNoNaNNaNNaNNaNSherpa/Romeohttps://v2.sherpa.ac.uk/id/publication/25478DOIYesYesYeshttps://pbsociety.org.pl/journals/index.php/am...NaNNaNQH301-705.5Science: Biology (General)No2014-05-29T20:02:32Z2021-01-16T17:41:32Z11542021-03-05T18:55:46Z
..................................................................................................................................................................
16024BME Frontiershttps://spj.sciencemag.org/bmefhttps://doaj.org/toc/f9fa881c1be5443a86ed71c2e...Biomedical Engineering FrontiersNaN2765-8031biomedical imaging, biomedical devices, biomat...EnglishAmerican Association for the Advancement of Sc...United StatesSuzhou Institute of Biomedical Engineering and...ChinaCC BYNaNhttps://spj.sciencemag.org/bmef/guidelines/#co...Yeshttps://spj.sciencemag.org/journals/bmef/2020/...Nohttps://spj.sciencemag.org/bmef/guidelines/#co...Blind peer reviewhttps://spj.sciencemag.org/bmef/peer-review-pr...Yeshttps://spj.sciencemag.org/bmef/publication-et...https://spj.sciencemag.org/bmef/about/#mission...https://spj.sciencemag.org/bmef/editors/https://spj.sciencemag.org/bmef/guidelines/16Nohttps://spj.sciencemag.org/bmef/apc/NaNYeshttps://spj.sciencemag.org/bmef/apc/NoNaNNaNNaNNaNNaNNaNDOIYesYesYeshttps://spj.sciencemag.org/bmef/about/NaNNaNR855-855.5|TP248.13-248.65Medicine: Medicine (General): Medical technolo...No2021-01-22T11:54:20Z2021-01-22T11:54:20Z112021-03-08T09:06:36Z
16025Harvard Kennedy School Misinformation Reviewhttps://misinforeview.hks.harvard.eduhttps://doaj.org/toc/d71096ec7090499681cc0ccf8...HKS Misinformation ReviewNaN2766-1652misinformation, disinformation, fake newsEnglishHarvard Kennedy SchoolUnited StatesNaNNaNCC BYNaNhttps://misinforeview.hks.harvard.edu/editoria...Yeshttps://misinforeview.hks.harvard.edu/article/...Yeshttps://misinforeview.hks.harvard.edu/editoria...Double blind peer reviewhttps://misinforeview.hks.harvard.edu/editoria...NoNaNhttps://misinforeview.hks.harvard.edu/our-miss...https://misinforeview.hks.harvard.edu/editoria...https://misinforeview.hks.harvard.edu/submit/10Nohttps://misinforeview.hks.harvard.edu/editoria...NaNNoNaNNoNaNNaNNaNNaNNaNNaNDOIYesNoYeshttps://misinforeview.hks.harvard.edu/editoria...NaNNaNT58.5-58.64|P87-96Technology: Technology (General): Industrial e...No2021-02-12T10:29:21Z2021-02-12T10:29:21Z0NaN
16026One Health & Risk Managementhttps://journal.ohrm.bba.md/index.php/journal-...https://doaj.org/toc/68671b966cd24a0ebaa44d78f...OH&RM2887-34582587-3466one health, risc management, public health, hu...English, Romanian, French, RussianAsociatia de Biosiguranta si BiosecuritateMoldova, Republic ofNaNNaNCC BYNaNhttps://journal.ohrm.bba.md/index.php/journal-...Yeshttps://journal.ohrm.bba.md/index.php/journal-...Yeshttps://journal.ohrm.bba.md/index.php/journal-...Double blind peer reviewhttps://journal.ohrm.bba.md/index.php/journal-...NoNaNhttps://journal.ohrm.bba.md/index.php/journal-...https://journal.ohrm.bba.md/index.php/journal-...https://journal.ohrm.bba.md/index.php/journal-...10Nohttps://journal.ohrm.bba.md/index.php/journal-...NaNNoNaNNoNaNNaNNaNNaNNaNNaNDOI, UDCYesNoYeshttps://journal.ohrm.bba.md/index.php/journal-...NaNNaNR|QMedicine | ScienceNo2021-03-04T16:06:58Z2021-03-04T16:06:58Z42021-03-04T20:46:57Z
16027فصلنامه پژوهش‌های مدیریت منابع انسانیhttps://hrmj.ihu.ac.ir/?lang=enhttps://doaj.org/toc/87d44ffb6ff849b18d5ddce9c...Journal of Research in Human Resources Management8254-80022645-5072human resources managementPersianImam Hussein UniversityIran, Islamic Republic ofNaNNaNCC BYNaNhttps://hrmj.ihu.ac.ir/journal/about?lang=enNaNNaNYeshttps://hrmj.ihu.ac.ir/journal/about?lang=enDouble blind peer reviewhttps://hrmj.ihu.ac.ir/journal/process?lang=enNoNaNhttps://hrmj.ihu.ac.ir/journal/aim_scope?lang=enhttps://hrmj.ihu.ac.ir/journal/editorial.board...https://hrmj.ihu.ac.ir/journal/authors.note?la...20Nohttps://hrmj.ihu.ac.ir/?lang=enNaNNoNaNNoNaNNaNNaNNaNNaNNaNNaNNoNoYeshttps://hrmj.ihu.ac.ir/?lang=enNaNNaNHF5549-5549.5Social Sciences: Commerce: Business: Personnel...No2021-01-20T11:27:05Z2021-01-20T11:27:05Z0NaN
16028Science of Tsunami Hazardshttp://tsunamisociety.org/https://doaj.org/toc/a4f06be11f4f4db489dc034c7...NaN8755-6839NaNtsunamis, tsunami warning systems, earthquakes...EnglishTsunami Society InternationalUnited StatesTsunami Society InternationalNaNCC BYNaNhttp://tsunamisociety.org/InstructionsAuthors....NaNNaNNoNaNPeer reviewhttp://tsunamisociety.org/PeerReview.htmlNoNaNhttp://tsunamisociety.org/AboutUs.htmlhttp://tsunamisociety.org/EditorialBoard.htmlhttp://tsunamisociety.org/InstructionsAuthors....12Nohttp://tsunamisociety.org/InstructionsAuthors....NaNNoNaNYeshttp://tsunamisociety.org/InstructionsAuthors....NaNNaNNaNNaNNaNNaNNaNNaNYeshttp://tsunamisociety.org/AboutUs.htmlNaNNaNGC1-1581Geography. Anthropology. Recreation: OceanographyNo2009-04-16T17:40:30Z2016-07-21T16:09:38Z2392021-02-27T01:00:51Z
-

16029 rows × 53 columns

-
- - - - -```python -# garder les lignes avec APC -doaj_apc = doaj.loc[doaj['APC'] == 'Yes'][['Journal ISSN (print version)', 'Journal EISSN (online version)', 'APC amount']] -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amount
20001-55551651-20571600 EUR
40001-625X2353-074X400 EUR
50001-69181873-62971500 USD
60001-69772083-9480520 EUR
110003-10622327-97883500 USD
............
16002NaN2722-1253200 USD
16004NaN2722-723535 USD
160052722-96882722-9696500000 IDR
16007NaN2723-1097100000 IDR
160222765-01892765-0235700 USD
-

4462 rows × 3 columns

-
- - - - -```python -# garder les lignes avec APC no -doaj_apc_no = doaj.loc[doaj['APC'] == 'No'][['Journal ISSN (print version)', 'Journal EISSN (online version)']] -doaj_apc_no -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)
00001-37651678-2690
10001-494X2282-0035
30001-60122215-5856
70001-70191846-0410
80002-03971868-6869
.........
16024NaN2765-8031
16025NaN2766-1652
160262887-34582587-3466
160278254-80022645-5072
160288755-6839NaN
-

11567 rows × 2 columns

-
- - - - -```python -# attribuer la valeur 0 -doaj_apc_no['APC amount'] = 0 -doaj_apc_no -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amount
00001-37651678-26900
10001-494X2282-00350
30001-60122215-58560
70001-70191846-04100
80002-03971868-68690
............
16024NaN2765-80310
16025NaN2766-16520
160262887-34582587-34660
160278254-80022645-50720
160288755-6839NaN0
-

11567 rows × 3 columns

-
- - - - -```python -# ajout à la table des APC -doaj_apc = doaj_apc.append(doaj_apc_no, ignore_index=True) -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amount
00001-55551651-20571600 EUR
10001-625X2353-074X400 EUR
20001-69181873-62971500 USD
30001-69772083-9480520 EUR
40003-10622327-97883500 USD
............
16024NaN2765-80310
16025NaN2766-16520
160262887-34582587-34660
160278254-80022645-50720
160288755-6839NaN0
-

16029 rows × 3 columns

-
- - - - -```python -# découpage du prix en 'amount' et 'symbol' -doaj_apc[['amount', 'symbol']] = doaj_apc['APC amount'].str.split(' ', n=1, expand=True) -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amountamountsymbol
00001-55551651-20571600 EUR1600EUR
10001-625X2353-074X400 EUR400EUR
20001-69181873-62971500 USD1500USD
30001-69772083-9480520 EUR520EUR
40003-10622327-97883500 USD3500USD
..................
16024NaN2765-80310NaNNaN
16025NaN2766-16520NaNNaN
160262887-34582587-34660NaNNaN
160278254-80022645-50720NaNNaN
160288755-6839NaN0NaNNaN
-

16029 rows × 5 columns

-
- - - - -```python -doaj_apc.loc[doaj_apc['APC amount'] == 0, 'amount'] = 0 -doaj_apc.loc[doaj_apc['APC amount'] == 0, 'symbol'] = '' -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amountamountsymbol
00001-55551651-20571600 EUR1600EUR
10001-625X2353-074X400 EUR400EUR
20001-69181873-62971500 USD1500USD
30001-69772083-9480520 EUR520EUR
40003-10622327-97883500 USD3500USD
..................
16024NaN2765-803100
16025NaN2766-165200
160262887-34582587-346600
160278254-80022645-507200
160288755-6839NaN00
-

16029 rows × 5 columns

-
- - - - -```python -# ajouter les champs manquants -doaj_apc['cost_factor_type'] = 1 -doaj_apc['comment'] = 'Source: DOAJ' -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Journal ISSN (print version)Journal EISSN (online version)APC amountamountsymbolcost_factor_typecomment
00001-55551651-20571600 EUR1600EUR1Source: DOAJ
10001-625X2353-074X400 EUR400EUR1Source: DOAJ
20001-69181873-62971500 USD1500USD1Source: DOAJ
30001-69772083-9480520 EUR520EUR1Source: DOAJ
40003-10622327-97883500 USD3500USD1Source: DOAJ
........................
16024NaN2765-8031001Source: DOAJ
16025NaN2766-1652001Source: DOAJ
160262887-34582587-3466001Source: DOAJ
160278254-80022645-5072001Source: DOAJ
160288755-6839NaN001Source: DOAJ
-

16029 rows × 7 columns

-
- - - - -```python -# renommer les champs -doaj_apc = doaj_apc.rename(columns = {'Journal ISSN (print version)' : 'issn_print', 'Journal EISSN (online version)' : 'issn_electronic'}) -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecomment
00001-55551651-20571600 EUR1600EUR1Source: DOAJ
10001-625X2353-074X400 EUR400EUR1Source: DOAJ
20001-69181873-62971500 USD1500USD1Source: DOAJ
30001-69772083-9480520 EUR520EUR1Source: DOAJ
40003-10622327-97883500 USD3500USD1Source: DOAJ
........................
16024NaN2765-8031001Source: DOAJ
16025NaN2766-1652001Source: DOAJ
160262887-34582587-3466001Source: DOAJ
160278254-80022645-5072001Source: DOAJ
160288755-6839NaN001Source: DOAJ
-

16029 rows × 7 columns

-
- - - - -```python -# ajout du issn -doaj_apc['issn'] = doaj_apc['issn_electronic'] -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecommentissn
00001-55551651-20571600 EUR1600EUR1Source: DOAJ1651-2057
10001-625X2353-074X400 EUR400EUR1Source: DOAJ2353-074X
20001-69181873-62971500 USD1500USD1Source: DOAJ1873-6297
30001-69772083-9480520 EUR520EUR1Source: DOAJ2083-9480
40003-10622327-97883500 USD3500USD1Source: DOAJ2327-9788
...........................
16024NaN2765-8031001Source: DOAJ2765-8031
16025NaN2766-1652001Source: DOAJ2766-1652
160262887-34582587-3466001Source: DOAJ2587-3466
160278254-80022645-5072001Source: DOAJ2645-5072
160288755-6839NaN001Source: DOAJNaN
-

16029 rows × 8 columns

-
- - - - -```python -doaj_apc.loc[doaj_apc['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecommentissn
120013-9998NaN350 EUR350EUR1Source: DOAJNaN
140015-4040NaN747 USD747USD1Source: DOAJNaN
170017-0011NaN400 EUR400EUR1Source: DOAJNaN
290026-1165NaN220000 JPY220000JPY1Source: DOAJNaN
300026-279XNaN350 USD350USD1Source: DOAJNaN
...........................
158672676-5357NaN001Source: DOAJNaN
158922686-9594NaN001Source: DOAJNaN
159372701-1569NaN001Source: DOAJNaN
159742709-8370NaN001Source: DOAJNaN
160288755-6839NaN001Source: DOAJNaN
-

1461 rows × 8 columns

-
- - - - -```python -# ajout du issnp quand c'est vide -doaj_apc.loc[doaj_apc['issn'].isna(), 'issn'] = doaj_apc['issn_print'] -doaj_apc.loc[doaj_apc['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecommentissn
-
- - - - -```python -doaj_apc = pd.merge(doaj_apc, issns, on='issn', how='left') -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecommentissnissnl
00001-55551651-20571600 EUR1600EUR1Source: DOAJ1651-20570001-5555
10001-625X2353-074X400 EUR400EUR1Source: DOAJ2353-074X0001-625X
20001-69181873-62971500 USD1500USD1Source: DOAJ1873-62970001-6918
30001-69772083-9480520 EUR520EUR1Source: DOAJ2083-94800001-6977
40003-10622327-97883500 USD3500USD1Source: DOAJ2327-97880003-1062
..............................
16024NaN2765-8031001Source: DOAJ2765-8031NaN
16025NaN2766-1652001Source: DOAJ2766-1652NaN
160262887-34582587-3466001Source: DOAJ2587-3466NaN
160278254-80022645-5072001Source: DOAJ2645-5072NaN
160288755-6839NaN001Source: DOAJ8755-68398755-6839
-

16029 rows × 9 columns

-
- - - - -```python -# renommer les colonnes -doaj_apc = doaj_apc.rename(columns={'issnl' : 'issn_link'}) -doaj_apc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issn_printissn_electronicAPC amountamountsymbolcost_factor_typecommentissnissn_link
00001-55551651-20571600 EUR1600EUR1Source: DOAJ1651-20570001-5555
10001-625X2353-074X400 EUR400EUR1Source: DOAJ2353-074X0001-625X
20001-69181873-62971500 USD1500USD1Source: DOAJ1873-62970001-6918
30001-69772083-9480520 EUR520EUR1Source: DOAJ2083-94800001-6977
40003-10622327-97883500 USD3500USD1Source: DOAJ2327-97880003-1062
..............................
16024NaN2765-8031001Source: DOAJ2765-8031NaN
16025NaN2766-1652001Source: DOAJ2766-1652NaN
160262887-34582587-3466001Source: DOAJ2587-3466NaN
160278254-80022645-5072001Source: DOAJ2645-5072NaN
160288755-6839NaN001Source: DOAJ8755-68398755-6839
-

16029 rows × 9 columns

-
- - - -### Ajout des APCs depuis la base Journal Database (Zurich Open Repository and Archive) - -https://www.jdb.uzh.ch/ - - -```python -# JDB base de Zurich -jdb = pd.read_csv('zora/jdb_apcs.tsv', encoding='utf-8', header=0, sep='\t') -jdb -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idissn_printissn_electronicissn_linkapc_feeapc_currencyapc_date
0100011662-51611662-51611662-51612490USD2018
1100011662-51611662-51611662-51612950USD2020
2100020952-33831467-85780952-33832500EUR2017
3100051179-72581179-72581179-72581958USD2018
4100051179-72581179-72581179-72581958USD2020
........................
1157599861549-96341549-96421549-96343000USD2015
1157699861549-96341549-96421549-96343550USD2016
1157799861549-96341549-96421549-96343550USD2017
1157899861549-96341549-96421549-96343750USD2018
1157999950816-46491465-33030816-46492950USD2017
-

11580 rows × 7 columns

-
- - - - -```python -# renommer l'id -jdb = jdb.rename(columns = {'id' : 'jdb_id'}) -jdb -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
jdb_idissn_printissn_electronicissn_linkapc_feeapc_currencyapc_date
0100011662-51611662-51611662-51612490USD2018
1100011662-51611662-51611662-51612950USD2020
2100020952-33831467-85780952-33832500EUR2017
3100051179-72581179-72581179-72581958USD2018
4100051179-72581179-72581179-72581958USD2020
........................
1157599861549-96341549-96421549-96343000USD2015
1157699861549-96341549-96421549-96343550USD2016
1157799861549-96341549-96421549-96343550USD2017
1157899861549-96341549-96421549-96343750USD2018
1157999950816-46491465-33030816-46492950USD2017
-

11580 rows × 7 columns

-
- - - - -```python -# ajouter les champs manquants -jdb['cost_factor_type'] = 1 -jdb['comment'] = 'Source: JDB (' + jdb['apc_date'].astype(str) + ')' -jdb -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
jdb_idissn_printissn_electronicissn_linkapc_feeapc_currencyapc_datecost_factor_typecomment
0100011662-51611662-51611662-51612490USD20181Source: JDB (2018)
1100011662-51611662-51611662-51612950USD20201Source: JDB (2020)
2100020952-33831467-85780952-33832500EUR20171Source: JDB (2017)
3100051179-72581179-72581179-72581958USD20181Source: JDB (2018)
4100051179-72581179-72581179-72581958USD20201Source: JDB (2020)
..............................
1157599861549-96341549-96421549-96343000USD20151Source: JDB (2015)
1157699861549-96341549-96421549-96343550USD20161Source: JDB (2016)
1157799861549-96341549-96421549-96343550USD20171Source: JDB (2017)
1157899861549-96341549-96421549-96343750USD20181Source: JDB (2018)
1157999950816-46491465-33030816-46492950USD20171Source: JDB (2017)
-

11580 rows × 9 columns

-
- - - - -```python -# renommer les champs -jdb = jdb.rename(columns = {'apc_fee' : 'amount', 'apc_currency' : 'symbol'}) -jdb -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
jdb_idissn_printissn_electronicissn_linkamountsymbolapc_datecost_factor_typecomment
0100011662-51611662-51611662-51612490USD20181Source: JDB (2018)
1100011662-51611662-51611662-51612950USD20201Source: JDB (2020)
2100020952-33831467-85780952-33832500EUR20171Source: JDB (2017)
3100051179-72581179-72581179-72581958USD20181Source: JDB (2018)
4100051179-72581179-72581179-72581958USD20201Source: JDB (2020)
..............................
1157599861549-96341549-96421549-96343000USD20151Source: JDB (2015)
1157699861549-96341549-96421549-96343550USD20161Source: JDB (2016)
1157799861549-96341549-96421549-96343550USD20171Source: JDB (2017)
1157899861549-96341549-96421549-96343750USD20181Source: JDB (2018)
1157999950816-46491465-33030816-46492950USD20171Source: JDB (2017)
-

11580 rows × 9 columns

-
- - - - -```python -jdb = jdb.drop_duplicates(subset='jdb_id', keep='last') -``` - - -```python -# import openapc avec les valeurs max -openapc = pd.read_csv('openapc/open_apc_max.tsv', encoding='utf-8', header=0, sep='\t') -openapc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
periodeuroissnissn_printissn_electronicissn_l
020181385.360001-07820001-0782NaN0001-0782
120181811.880001-14520001-14521533-385X0001-1452
220201826.490001-14520001-14521533-385X0001-1452
320132238.760001-1541NaNNaN0001-1541
420141887.860001-1541NaNNaN0001-1541
.....................
2379320132400.008756-7938NaNNaN1520-6033
2379420141822.498756-7938NaNNaN1520-6033
2379520161762.698756-7938NaNNaN1520-6033
2379620173248.318756-7938NaNNaN1520-6033
2379720192913.118756-7938NaNNaN1520-6033
-

23798 rows × 6 columns

-
- - - - -```python -# renommer les champs -openapc = openapc.rename(columns = {'period' : 'apc_date', 'issn_l' : 'issn_link', 'euro' : 'amount'}) -openapc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
apc_dateamountissnissn_printissn_electronicissn_link
020181385.360001-07820001-0782NaN0001-0782
120181811.880001-14520001-14521533-385X0001-1452
220201826.490001-14520001-14521533-385X0001-1452
320132238.760001-1541NaNNaN0001-1541
420141887.860001-1541NaNNaN0001-1541
.....................
2379320132400.008756-7938NaNNaN1520-6033
2379420141822.498756-7938NaNNaN1520-6033
2379520161762.698756-7938NaNNaN1520-6033
2379620173248.318756-7938NaNNaN1520-6033
2379720192913.118756-7938NaNNaN1520-6033
-

23798 rows × 6 columns

-
- - - - -```python -# ajouter le lien avec le type et le symbole -openapc['cost_factor_type'] = 1 -openapc['jdb_id'] = np.nan -openapc['symbol'] = 'EUR' -openapc['comment'] = 'Source: OpenAPC (' + openapc['apc_date'].astype(str) + ')' -openapc -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
apc_dateamountissnissn_printissn_electronicissn_linkcost_factor_typejdb_idsymbolcomment
020181385.360001-07820001-0782NaN0001-07821NaNEURSource: OpenAPC (2018)
120181811.880001-14520001-14521533-385X0001-14521NaNEURSource: OpenAPC (2018)
220201826.490001-14520001-14521533-385X0001-14521NaNEURSource: OpenAPC (2020)
320132238.760001-1541NaNNaN0001-15411NaNEURSource: OpenAPC (2013)
420141887.860001-1541NaNNaN0001-15411NaNEURSource: OpenAPC (2014)
.................................
2379320132400.008756-7938NaNNaN1520-60331NaNEURSource: OpenAPC (2013)
2379420141822.498756-7938NaNNaN1520-60331NaNEURSource: OpenAPC (2014)
2379520161762.698756-7938NaNNaN1520-60331NaNEURSource: OpenAPC (2016)
2379620173248.318756-7938NaNNaN1520-60331NaNEURSource: OpenAPC (2017)
2379720192913.118756-7938NaNNaN1520-60331NaNEURSource: OpenAPC (2019)
-

23798 rows × 10 columns

-
- - - - -```python -# ajout des lignes de openapc -jdb = jdb.append(openapc, ignore_index=True) -jdb -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py:7123: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version - of pandas will change to not sort by default. - - To accept the future behavior, pass 'sort=False'. - - To retain the current behavior and silence the warning, pass 'sort=True'. - - sort=sort, - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
amountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbol
02950.002020Source: JDB (2020)1NaN1662-51611662-51611662-516110001.0USD
12500.002017Source: JDB (2017)1NaN1467-85780952-33830952-338310002.0EUR
21958.002020Source: JDB (2020)1NaN1179-72581179-72581179-725810005.0USD
31370.002020Source: JDB (2020)1NaN1479-58761479-5876NaN10015.0GBP
42200.002017Source: JDB (2017)1NaN1572-85521383-49241383-492410023.0EUR
.................................
299472400.002013Source: OpenAPC (2013)18756-7938NaN1520-6033NaNNaNEUR
299481822.492014Source: OpenAPC (2014)18756-7938NaN1520-6033NaNNaNEUR
299491762.692016Source: OpenAPC (2016)18756-7938NaN1520-6033NaNNaNEUR
299503248.312017Source: OpenAPC (2017)18756-7938NaN1520-6033NaNNaNEUR
299512913.112019Source: OpenAPC (2019)18756-7938NaN1520-6033NaNNaNEUR
-

29952 rows × 10 columns

-
- - - - -```python -# supprimer les doublons par issnl et date -jdb = jdb.drop_duplicates(subset=['issn_link', 'apc_date'], keep='first') -jdb -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
amountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbol
02950.002020Source: JDB (2020)1NaN1662-51611662-51611662-516110001.0USD
12500.002017Source: JDB (2017)1NaN1467-85780952-33830952-338310002.0EUR
21958.002020Source: JDB (2020)1NaN1179-72581179-72581179-725810005.0USD
31370.002020Source: JDB (2020)1NaN1479-58761479-5876NaN10015.0GBP
42200.002017Source: JDB (2017)1NaN1572-85521383-49241383-492410023.0EUR
.................................
299472400.002013Source: OpenAPC (2013)18756-7938NaN1520-6033NaNNaNEUR
299481822.492014Source: OpenAPC (2014)18756-7938NaN1520-6033NaNNaNEUR
299491762.692016Source: OpenAPC (2016)18756-7938NaN1520-6033NaNNaNEUR
299503248.312017Source: OpenAPC (2017)18756-7938NaN1520-6033NaNNaNEUR
299512913.112019Source: OpenAPC (2019)18756-7938NaN1520-6033NaNNaNEUR
-

29478 rows × 10 columns

-
- - - - -```python -# ajout de DOAJ -cost_factor = doaj_apc.append(jdb, ignore_index=True) -cost_factor -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbol
01600 EUR1600NaNSource: DOAJ11651-20571651-20570001-55550001-5555NaNEUR
1400 EUR400NaNSource: DOAJ12353-074X2353-074X0001-625X0001-625XNaNEUR
21500 USD1500NaNSource: DOAJ11873-62971873-62970001-69180001-6918NaNUSD
3520 EUR520NaNSource: DOAJ12083-94802083-94800001-69770001-6977NaNEUR
43500 USD3500NaNSource: DOAJ12327-97882327-97880003-10620003-1062NaNUSD
....................................
45502NaN24002013Source: OpenAPC (2013)18756-7938NaN1520-6033NaNNaNEUR
45503NaN1822.492014Source: OpenAPC (2014)18756-7938NaN1520-6033NaNNaNEUR
45504NaN1762.692016Source: OpenAPC (2016)18756-7938NaN1520-6033NaNNaNEUR
45505NaN3248.312017Source: OpenAPC (2017)18756-7938NaN1520-6033NaNNaNEUR
45506NaN2913.112019Source: OpenAPC (2019)18756-7938NaN1520-6033NaNNaNEUR
-

45507 rows × 11 columns

-
- - - - -```python -# test issnl -cost_factor.loc[cost_factor['issn_link'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbol
13540 PLN540NaNSource: DOAJ12544-85522544-8552NaN0014-8261NaNPLN
62100 USD100NaNSource: DOAJ12545-31492545-3149NaN0079-4252NaNUSD
129423 EUR423NaNSource: DOAJ12605-33222605-3322NaN0212-9426NaNEUR
133200 EUR200NaNSource: DOAJ12603-59872603-5987NaN0214-9877NaNEUR
140800000 IDR800000NaNSource: DOAJ12621-11222621-1122NaN0216-3438NaNIDR
....................................
26703NaN3873.612016Source: OpenAPC (2016)10263-8762NaNNaN0263-8762NaNEUR
26704NaN2557.732017Source: OpenAPC (2017)10263-8762NaNNaN0263-8762NaNEUR
26705NaN3564.252018Source: OpenAPC (2018)10263-8762NaNNaN0263-8762NaNEUR
27923NaN1130.52019Source: OpenAPC (2019)10342-183XNaNNaN0342-183XNaNEUR
45474NaN16902020Source: OpenAPC (2020)12691-9478NaNNaNNaNNaNEUR
-

2500 rows × 11 columns

-
- - - - -```python -# merge avec issnl -cost_factor = pd.merge(cost_factor, issns, on='issn', how='left') -cost_factor -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbolissnl
01600 EUR1600NaNSource: DOAJ11651-20571651-20570001-55550001-5555NaNEUR0001-5555
1400 EUR400NaNSource: DOAJ12353-074X2353-074X0001-625X0001-625XNaNEUR0001-625X
21500 USD1500NaNSource: DOAJ11873-62971873-62970001-69180001-6918NaNUSD0001-6918
3520 EUR520NaNSource: DOAJ12083-94802083-94800001-69770001-6977NaNEUR0001-6977
43500 USD3500NaNSource: DOAJ12327-97882327-97880003-10620003-1062NaNUSD0003-1062
.......................................
45502NaN24002013Source: OpenAPC (2013)18756-7938NaN1520-6033NaNNaNEUR1520-6033
45503NaN1822.492014Source: OpenAPC (2014)18756-7938NaN1520-6033NaNNaNEUR1520-6033
45504NaN1762.692016Source: OpenAPC (2016)18756-7938NaN1520-6033NaNNaNEUR1520-6033
45505NaN3248.312017Source: OpenAPC (2017)18756-7938NaN1520-6033NaNNaNEUR1520-6033
45506NaN2913.112019Source: OpenAPC (2019)18756-7938NaN1520-6033NaNNaNEUR1520-6033
-

45507 rows × 12 columns

-
- - - - -```python -# test issnl -cost_factor.loc[cost_factor['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbolissnl
13540 PLN540NaNSource: DOAJ12544-85522544-8552NaN0014-8261NaNPLNNaN
62100 USD100NaNSource: DOAJ12545-31492545-3149NaN0079-4252NaNUSDNaN
129423 EUR423NaNSource: DOAJ12605-33222605-3322NaN0212-9426NaNEURNaN
133200 EUR200NaNSource: DOAJ12603-59872603-5987NaN0214-9877NaNEURNaN
140800000 IDR800000NaNSource: DOAJ12621-11222621-1122NaN0216-3438NaNIDRNaN
.......................................
45472NaN698.652019Source: OpenAPC (2019)12690-00092690-00092690-0009NaNNaNEURNaN
45473NaN754.672019Source: OpenAPC (2019)12690-3202NaN2690-3202NaNNaNEURNaN
45474NaN16902020Source: OpenAPC (2020)12691-9478NaNNaNNaNNaNEURNaN
45475NaN1523.22020Source: OpenAPC (2020)12699-00162699-00162699-0016NaNNaNEURNaN
45476NaN3052020Source: OpenAPC (2020)12704-61922704-61922280-18552280-1855NaNEURNaN
-

8935 rows × 12 columns

-
- - - - -```python -#ajout des issn quand ça manque -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_print'] -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_electronic'] -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_link'] -cost_factor.loc[cost_factor['issn'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbolissnl
-
- - - - -```python -#ajout des issnl quand ça manque -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_link'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_print'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_electronic'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn'] -cost_factor.loc[cost_factor['issnl'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - -
APC amountamountapc_datecommentcost_factor_typeissnissn_electronicissn_linkissn_printjdb_idsymbolissnl
-
- - - - -```python -# prendre les ids pour le merge -cost_factor_ids = cost_factor[['issn', 'issnl', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_1 = cost_factor_ids_1.rename(columns = {'issn_link' : 'issn'}) -# cost_factor_ids_2 = cost_factor.loc[cost_factor['issn_electronic'].notna()][['issn_electronic', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_2 = cost_factor_ids_2.rename(columns = {'issn_electronic' : 'issn'}) -# cost_factor_ids_3 = cost_factor.loc[cost_factor['issn_print'].notna()][['issn_print', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_3 = cost_factor_ids_3.rename(columns = {'issn_print' : 'issn'}) -# cost_factor_ids_4 = cost_factor.loc[cost_factor['issn'].notna()][['issn', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids = cost_factor_ids_1.append(cost_factor_ids_2) -# cost_factor_ids = cost_factor_ids.append(cost_factor_ids_3) -# cost_factor_ids = cost_factor_ids.append(cost_factor_ids_4) -cost_factor_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcomment
01651-20570001-555511600EURSource: DOAJ
12353-074X0001-625X1400EURSource: DOAJ
21873-62970001-691811500USDSource: DOAJ
32083-94800001-69771520EURSource: DOAJ
42327-97880003-106213500USDSource: DOAJ
.....................
455028756-79381520-603312400EURSource: OpenAPC (2013)
455038756-79381520-603311822.49EURSource: OpenAPC (2014)
455048756-79381520-603311762.69EURSource: OpenAPC (2016)
455058756-79381520-603313248.31EURSource: OpenAPC (2017)
455068756-79381520-603312913.11EURSource: OpenAPC (2019)
-

45507 rows × 6 columns

-
- - - - -```python -# supprimer les doublons et les vides -cost_factor_ids = cost_factor_ids.drop_duplicates(subset=['issnl']) -cost_factor_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcomment
01651-20570001-555511600EURSource: DOAJ
12353-074X0001-625X1400EURSource: DOAJ
21873-62970001-691811500USDSource: DOAJ
32083-94800001-69771520EURSource: DOAJ
42327-97880003-106213500USDSource: DOAJ
.....................
454732690-32022690-32021754.67EURSource: OpenAPC (2019)
454742691-94782691-947811690EURSource: OpenAPC (2020)
454778750-75871522-160112355.13EURSource: OpenAPC (2016)
454818755-12091944-920812627.74EURSource: OpenAPC (2013)
454988756-758X1460-269512725.08EURSource: OpenAPC (2014)
-

24018 rows × 6 columns

-
- - - - -```python -# merge dans l'autre sens pour garder que les lignes du fichier -cost_factor_ids = pd.merge(cost_factor_ids, sherpa[['id', 'issnl']], on='issnl', how='left') -cost_factor_ids -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentid
01651-20570001-555511600EURSource: DOAJNaN
12353-074X0001-625X1400EURSource: DOAJNaN
21873-62970001-691811500USDSource: DOAJNaN
32083-94800001-69771520EURSource: DOAJNaN
42327-97880003-106213500USDSource: DOAJNaN
........................
313972690-32022690-32021754.67EURSource: OpenAPC (2019)NaN
313982691-94782691-947811690EURSource: OpenAPC (2020)NaN
313998750-75871522-160112355.13EURSource: OpenAPC (2016)NaN
314008755-12091944-920812627.74EURSource: OpenAPC (2013)NaN
314018756-758X1460-269512725.08EURSource: OpenAPC (2014)NaN
-

31402 rows × 7 columns

-
- - - - -```python -# garder les lignes avec merge -cost_factor_ids_all = cost_factor_ids.loc[cost_factor_ids['id'].notnull()] -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentid
231083-351X0021-925812500USDSource: DOAJ1369.0
241083-351X0021-925812500USDSource: DOAJ1370.0
251083-351X0021-925812500USDSource: DOAJ1371.0
261083-351X0021-925812500USDSource: DOAJ1372.0
311536-59640025-797411950USDSource: DOAJ2147.0
........................
312972475-99532475-995312023.37EURSource: OpenAPC (2017)8591.0
312982475-99532475-995312023.37EURSource: OpenAPC (2017)8592.0
312992475-99532475-995312023.37EURSource: OpenAPC (2017)8593.0
313002475-99532475-995312023.37EURSource: OpenAPC (2017)8594.0
313012475-99532475-995312023.37EURSource: OpenAPC (2017)8595.0
-

7964 rows × 7 columns

-
- - - - -```python -# supprimer les doublons -cost_factor_ids_all = cost_factor_ids_all.drop_duplicates(subset=['id']) -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentid
231083-351X0021-925812500USDSource: DOAJ1369.0
241083-351X0021-925812500USDSource: DOAJ1370.0
251083-351X0021-925812500USDSource: DOAJ1371.0
261083-351X0021-925812500USDSource: DOAJ1372.0
311536-59640025-797411950USDSource: DOAJ2147.0
........................
312972475-99532475-995312023.37EURSource: OpenAPC (2017)8591.0
312982475-99532475-995312023.37EURSource: OpenAPC (2017)8592.0
312992475-99532475-995312023.37EURSource: OpenAPC (2017)8593.0
313002475-99532475-995312023.37EURSource: OpenAPC (2017)8594.0
313012475-99532475-995312023.37EURSource: OpenAPC (2017)8595.0
-

7964 rows × 7 columns

-
- - - - -```python -# supprimer les doublons par issnl -cost_factor_ids_all = cost_factor_ids_all.drop_duplicates(subset=['issnl']) -del cost_factor_ids_all['id'] -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcomment
231083-351X0021-925812500USDSource: DOAJ
311536-59640025-797411950USDSource: DOAJ
2221592-87210390-607812000EURSource: DOAJ
3031555-38920963-689712750USDSource: DOAJ
4021095-95721053-811913000USDSource: DOAJ
.....................
312372469-99262469-992612156.51EURSource: OpenAPC (2015)
312422469-99502469-995012143.51EURSource: OpenAPC (2016)
312482470-00102470-001011763.13EURSource: OpenAPC (2016)
312532470-00452470-004511211.45EURSource: OpenAPC (2016)
312972475-99532475-995312023.37EURSource: OpenAPC (2017)
-

580 rows × 6 columns

-
- - - - -```python -# convertir l'index en id -cost_factor_ids_all = cost_factor_ids_all.reset_index() -# ajout de l'id avec l'index + 1 -cost_factor_ids_all['cost_factor'] = cost_factor_ids_all['index'] + id_start -del cost_factor_ids_all['index'] -# convertir l'index en id -cost_factor_ids_all = cost_factor_ids_all.reset_index() -# ajout de l'id avec l'index + 1 -cost_factor_ids_all['cost_factor'] = cost_factor_ids_all['index'] + id_start -del cost_factor_ids_all['index'] -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentcost_factor
01083-351X0021-925812500USDSource: DOAJ1
11536-59640025-797411950USDSource: DOAJ2
21592-87210390-607812000EURSource: DOAJ3
31555-38920963-689712750USDSource: DOAJ4
41095-95721053-811913000USDSource: DOAJ5
........................
5752469-99262469-992612156.51EURSource: OpenAPC (2015)576
5762469-99502469-995012143.51EURSource: OpenAPC (2016)577
5772470-00102470-001011763.13EURSource: OpenAPC (2016)578
5782470-00452470-004511211.45EURSource: OpenAPC (2016)579
5792475-99532475-995312023.37EURSource: OpenAPC (2017)580
-

580 rows × 7 columns

-
- - - - -```python -# merge avec la table sherpa -sherpa = pd.merge(sherpa, cost_factor_ids_all[['issnl', 'cost_factor']], on='issnl', how='left') -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151NaN355.0
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152NaN355.0
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0355.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0355.0
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421NaN356.0
......................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531NaN580.0
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532NaN580.0
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN580.0
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0580.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0580.0
-

8595 rows × 33 columns

-
- - - - -```python -sherpa.loc[sherpa['cost_factor'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
937870002-95137391https://v2.sherpa.ac.uk/id/publisher_policy/11nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repositoryNaNarXiv ; bioRxiv ; Preprint RepositoryarXiv ; bioRxivNaNauthorsNaNFalseMust be assigned a DOICan not be deposited after submission to journal940002-95131NaNNaN
947870002-95137391https://v2.sherpa.ac.uk/id/publisher_policy/11nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNinstitutional_repositoryInstitutional RepositoryNaNNaNNaNpublishersNaNTrueMust link to publisher version with DOINaN950002-95132NaNNaN
957870002-95137391https://v2.sherpa.ac.uk/id/publisher_policy/11nonopublishedNaN12NaNTrueNational Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0NaNnamed_repositoryNaNPubMed CentralPubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseMust link to publisher version with DOINaN960002-95133NaNNaN
967870002-95137391https://v2.sherpa.ac.uk/id/publisher_policy/11nonopublishedNaN12NaNTrueWellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08gbhttp://www.wellcome.ac.uk/695.0NaNnamed_repositoryNaNPubMed CentralPubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseMust link to publisher version with DOINaN970002-95133NaNNaN
977870002-95137391https://v2.sherpa.ac.uk/id/publisher_policy/11nonopublishedNaN12NaNTrueMedical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517gbhttp://www.mrc.ac.uk/index.htm705.0NaNnamed_repositoryNaNPubMed CentralPubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseMust link to publisher version with DOINaN980002-95133NaNNaN
......................................................................................................
81995651661-81578459https://v2.sherpa.ac.uk/id/publisher_policy/3494noyespublishedcc_by_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; subject_repos...Any WebsitePubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNNaNNaNTruePublished source must be acknowledged with cit...NaN82001661-815738.0NaN
82005651661-81578459https://v2.sherpa.ac.uk/id/publisher_policy/3494noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; subject_repos...Any WebsitePubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNNaNNaNTruePublished source must be acknowledged with cit...NaN82011661-815732.0NaN
83735301946-623411116https://v2.sherpa.ac.uk/id/publisher_policy/3nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repositoryNaNarXiv ; bioRxiv ; Preprint RepositoryarXiv ; bioRxivNaNNaNNaNFalseMay be considered prior publication, contact j...NaN83741946-62341NaNNaN
83745301946-623411116https://v2.sherpa.ac.uk/id/publisher_policy/3nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repositoryInstitutional RepositoryAuthor's HomepageNaNNaNauthorsNaNTruePublished source must be acknowledged with DOI...NaN83751946-62342NaNNaN
83755301946-623411116https://v2.sherpa.ac.uk/id/publisher_policy/3nonoacceptedNaN6when_required_by_funderNaNNaNNaNNaNNaNNaNNaNNaNfunder_designated_location ; named_repositoryNaNFunder Designated Location ; PubMed CentralPubMed CentralNaNauthorsNaNFalseMust state on submission Funding agency requir...NaN83761946-62342NaNNaN
-

631 rows × 33 columns

-
- - - - -```python -# garder les APCs pour la version published -sherpa.loc[sherpa['article_version'] != 'published', 'cost_factor'] = np.nan -sherpa.loc[sherpa['cost_factor'].notna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0355.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0355.0
64980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNfunder_designated_location ; named_repository ...NaNFunder Designated Location ; PubMed Central ; ...PubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseNaNNaN70001-484231.0356.0
74980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNfunder_designated_location ; named_repository ...NaNFunder Designated Location ; PubMed Central ; ...PubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseNaNNaN80001-484232.0356.0
84980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4noyespublishedbespoke_license0NaNNaNNaNNaNNaNNaNNaNNaNNaNfunder_designated_location ; named_repository ...NaNFunder Designated Location ; PubMed Central ; ...PubMed CentralNaNpublishersdisciplinary (PubMed Central) ;FalseNaNNaN90001-484233.0356.0
......................................................................................................
85885332470-004531531https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85892470-004531.0579.0
85895332470-004531531https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85902470-004531.0579.0
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN580.0
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0580.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0580.0
-

4462 rows × 33 columns

-
- - - - -```python -# renommer l'id du fichier sherpa brut -# cost_factor_ids_all = cost_factor_ids_all.rename(columns = {'id' : 'id_sherpa'}) -cost_factor_ids_all = cost_factor_ids_all.rename(columns = {'cost_factor' : 'id'}) -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentid
01083-351X0021-925812500USDSource: DOAJ1
11536-59640025-797411950USDSource: DOAJ2
21592-87210390-607812000EURSource: DOAJ3
31555-38920963-689712750USDSource: DOAJ4
41095-95721053-811913000USDSource: DOAJ5
........................
5752469-99262469-992612156.51EURSource: OpenAPC (2015)576
5762469-99502469-995012143.51EURSource: OpenAPC (2016)577
5772470-00102470-001011763.13EURSource: OpenAPC (2016)578
5782470-00452470-004511211.45EURSource: OpenAPC (2016)579
5792475-99532475-995312023.37EURSource: OpenAPC (2017)580
-

580 rows × 7 columns

-
- - - - -```python -cost_factor_ids_all['id'] = cost_factor_ids_all['id'].astype(int) -``` - - -```python -cost_factor_ids_all -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issnissnlcost_factor_typeamountsymbolcommentid
01083-351X0021-925812500USDSource: DOAJ1
11536-59640025-797411950USDSource: DOAJ2
21592-87210390-607812000EURSource: DOAJ3
31555-38920963-689712750USDSource: DOAJ4
41095-95721053-811913000USDSource: DOAJ5
........................
5752469-99262469-992612156.51EURSource: OpenAPC (2015)576
5762469-99502469-995012143.51EURSource: OpenAPC (2016)577
5772470-00102470-001011763.13EURSource: OpenAPC (2016)578
5782470-00452470-004511211.45EURSource: OpenAPC (2016)579
5792475-99532475-995312023.37EURSource: OpenAPC (2017)580
-

580 rows × 7 columns

-
- - - - -```python -cost_factor_export = cost_factor_ids_all[['id', 'cost_factor_type', 'amount', 'symbol', 'comment']] -cost_factor_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idcost_factor_typeamountsymbolcomment
0112500USDSource: DOAJ
1211950USDSource: DOAJ
2312000EURSource: DOAJ
3412750USDSource: DOAJ
4513000USDSource: DOAJ
..................
57557612156.51EURSource: OpenAPC (2015)
57657712143.51EURSource: OpenAPC (2016)
57757811763.13EURSource: OpenAPC (2016)
57857911211.45EURSource: OpenAPC (2016)
57958012023.37EURSource: OpenAPC (2017)
-

580 rows × 5 columns

-
- - - - -```python -cost_factor_export.shape[0] -``` - - - - - 580 - - - - -```python -# ajout de la valeur Rabais 100% pour les licences Read & Publish -rpid = cost_factor_export.shape[0] + 1 -cost_factor_export = cost_factor_export.append({'id' : rpid, 'cost_factor_type' : 2, 'amount' : 100, 'symbol' : '%', 'comment' : 'Read & Publish agreement'}, ignore_index=True) -cost_factor_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idcost_factor_typeamountsymbolcomment
0112500USDSource: DOAJ
1211950USDSource: DOAJ
2312000EURSource: DOAJ
3412750USDSource: DOAJ
4513000USDSource: DOAJ
..................
57657712143.51EURSource: OpenAPC (2016)
57757811763.13EURSource: OpenAPC (2016)
57857911211.45EURSource: OpenAPC (2016)
57958012023.37EURSource: OpenAPC (2017)
5805812100%Read & Publish agreement
-

581 rows × 5 columns

-
- - - - -```python -# ajout de l'id dans la table read & publish -rp['cost_factor'] = rpid -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthssherpa_codevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversionlicencecost_factor
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier31581
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier31581
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier31581
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier31581
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier31581
...................................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP35581
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP35581
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP35581
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP35581
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP35581
-

40083 rows × 16 columns

-
- - - - -```python -# ajout de la valeur UNKNOWN -cost_factor_export = cost_factor_export.append({'id' : 999999, 'cost_factor_type' : 999999, 'amount' : 0, 'symbol' : '', 'comment' : 'UNKNOWN'}, ignore_index=True) -cost_factor_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idcost_factor_typeamountsymbolcomment
0112500USDSource: DOAJ
1211950USDSource: DOAJ
2312000EURSource: DOAJ
3412750USDSource: DOAJ
4513000USDSource: DOAJ
..................
57757811763.13EURSource: OpenAPC (2016)
57857911211.45EURSource: OpenAPC (2016)
57958012023.37EURSource: OpenAPC (2017)
5805812100%Read & Publish agreement
5819999999999990UNKNOWN
-

582 rows × 5 columns

-
- - - - -```python -# export de la table -result = cost_factor_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/cost_factor.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -cost_factor_export.to_csv('sample/cost_factor.tsv', index=False) -``` - - -```python -# export excel -cost_factor_export.to_excel('sample/cost_factor.xlsx', index=False) -``` - -## Table term - - -```python -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151NaNNaN
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152NaNNaN
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0355.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0355.0
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421NaNNaN
......................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531NaNNaN
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532NaNNaN
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN580.0
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0580.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0580.0
-

8595 rows × 33 columns

-
- - - - -```python -# col_names = ['id', 'applicable_version', 'cost_factor', 'embargo', 'archiving'] -term_sherpa = sherpa[['id', 'version', 'cost_factor', 'embargo', 'archiving', 'locations_ir', 'locations_not_ir', 'licence', 'journal', 'conditions', 'public_notes', 'prerequisite_funders', 'prerequisite_funders_ror']] -term_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idversioncost_factorembargoarchivinglocations_irlocations_not_irlicencejournalconditionspublic_notesprerequisite_fundersprerequisite_funders_ror
011NaN0TrueNon-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...NaN532Must acknowledge acceptance for publication ; ...NaNNaNNaN
122NaN12TrueNon-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...NaN532Publisher source must be acknowledged with cit...NaNNaNNaN
233355.00TrueAny Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...1.0532Published source must be acknowledgedNaNNaNNaN
343355.00TrueAny Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...2.0532Published source must be acknowledgedNaNNaNNaN
451NaN0FalseNaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...NaN498Must not violate ACS ethical Guidelines ; Must...NaNNaNNaN
..........................................
859085911NaN0TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859185922NaN0TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859285933580.00TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859385943580.00TrueAny RepositoryJournal Website1.0608NaNNaNNaNNaN
859485953580.00TrueAny RepositoryJournal Website1.0608NaNNaNNaNNaN
-

8595 rows × 13 columns

-
- - - - -```python -# renommer les champs -term_sherpa = term_sherpa.rename(columns = {'id' : 'id_sherpa', 'embargo' : 'embargo_months', 'prerequisite_funders_ror' : 'ror'}) -term_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglocations_irlocations_not_irlicencejournalconditionspublic_notesprerequisite_fundersror
011NaN0TrueNon-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...NaN532Must acknowledge acceptance for publication ; ...NaNNaNNaN
122NaN12TrueNon-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...NaN532Publisher source must be acknowledged with cit...NaNNaNNaN
233355.00TrueAny Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...1.0532Published source must be acknowledgedNaNNaNNaN
343355.00TrueAny Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...2.0532Published source must be acknowledgedNaNNaNNaN
451NaN0FalseNaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...NaN498Must not violate ACS ethical Guidelines ; Must...NaNNaNNaN
..........................................
859085911NaN0TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859185922NaN0TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859285933580.00TrueInstitutional Repository ; Institutional WebsiteAuthor's HomepageNaN608Must link to published article ; Publisher cop...NaNNaNNaN
859385943580.00TrueAny RepositoryJournal Website1.0608NaNNaNNaNNaN
859485953580.00TrueAny RepositoryJournal Website1.0608NaNNaNNaNNaN
-

8595 rows × 13 columns

-
- - - - -```python -# merge des champs dans le comment : conditions, public_notes, locations_not_ir -term_sherpa['conditions'] = term_sherpa['conditions'].fillna('') -term_sherpa['public_notes'] = term_sherpa['public_notes'].fillna('') -term_sherpa['locations_not_ir'] = term_sherpa['locations_not_ir'].fillna('') -term_sherpa['locations_ir'] = term_sherpa['locations_ir'].fillna('') -term_sherpa.loc[term_sherpa['locations_not_ir'] != '', 'locations_not_ir'] = 'Non institutional archiving locations: ' + term_sherpa['locations_not_ir'] -term_sherpa.loc[term_sherpa['locations_ir'] != '', 'locations_ir'] = 'Institutional archiving locations: ' + term_sherpa['locations_ir'] -term_sherpa.loc[term_sherpa['archiving'] == False, 'comment'] = term_sherpa['locations_not_ir'] -term_sherpa.loc[term_sherpa['archiving'] == True, 'comment'] = term_sherpa['locations_ir'] -term_sherpa.loc[term_sherpa['comment'] == '', 'comment'] = 'Conditions: ' + term_sherpa['conditions'] -term_sherpa.loc[(term_sherpa['comment'] != '') & (term_sherpa['conditions'] != ''), 'comment'] = term_sherpa['comment'] + ' ; Conditions: ' + term_sherpa['conditions'] -term_sherpa.loc[(term_sherpa['public_notes'] != '') & (term_sherpa['public_notes'] != term_sherpa['comment']), 'comment'] = term_sherpa['comment'] + ' ; Public notes: ' + term_sherpa['public_notes'] -term_sherpa.loc[(term_sherpa['public_notes'] != '') & (term_sherpa['comment'] == ''), 'comment'] = 'Public notes: ' + term_sherpa['public_notes'] -term_sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglocations_irlocations_not_irlicencejournalconditionspublic_notesprerequisite_fundersrorcomment
011NaN0TrueInstitutional archiving locations: Non-Commerc...Non institutional archiving locations: Author'...NaN532Must acknowledge acceptance for publication ; ...NaNNaNInstitutional archiving locations: Non-Commerc...
122NaN12TrueInstitutional archiving locations: Non-Commerc...Non institutional archiving locations: Author'...NaN532Publisher source must be acknowledged with cit...NaNNaNInstitutional archiving locations: Non-Commerc...
233355.00TrueInstitutional archiving locations: Any Website...Non institutional archiving locations: PubMed ...1.0532Published source must be acknowledgedNaNNaNInstitutional archiving locations: Any Website...
343355.00TrueInstitutional archiving locations: Any Website...Non institutional archiving locations: PubMed ...2.0532Published source must be acknowledgedNaNNaNInstitutional archiving locations: Any Website...
451NaN0FalseNon institutional archiving locations: ChemRxi...NaN498Must not violate ACS ethical Guidelines ; Must...NaNNaNNon institutional archiving locations: ChemRxi...
.............................................
859085911NaN0TrueInstitutional archiving locations: Institution...Non institutional archiving locations: Author'...NaN608Must link to published article ; Publisher cop...NaNNaNInstitutional archiving locations: Institution...
859185922NaN0TrueInstitutional archiving locations: Institution...Non institutional archiving locations: Author'...NaN608Must link to published article ; Publisher cop...NaNNaNInstitutional archiving locations: Institution...
859285933580.00TrueInstitutional archiving locations: Institution...Non institutional archiving locations: Author'...NaN608Must link to published article ; Publisher cop...NaNNaNInstitutional archiving locations: Institution...
859385943580.00TrueInstitutional archiving locations: Any RepositoryNon institutional archiving locations: Journal...1.0608NaNNaNInstitutional archiving locations: Any Repository
859485953580.00TrueInstitutional archiving locations: Any RepositoryNon institutional archiving locations: Journal...1.0608NaNNaNInstitutional archiving locations: Any Repository
-

8595 rows × 14 columns

-
- - - - -```python -term_sherpa['prerequisite_funders'].value_counts() -``` - - - - - True 5585 - Name: prerequisite_funders, dtype: int64 - - - - -```python -rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
issntitlearchivingarticle_versionembargo_monthssherpa_codevalid_fromvalid_untilissnlrorjournalrp_idrp_publisherversionlicencecost_factor
01742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/04d8ztx87899.01Elsevier31581
11742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02bnkt322899.02Elsevier31581
21742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/00zg4za48899.03Elsevier31581
31742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/02s376052899.04Elsevier31581
41742-7061Acta BiomaterialiaTruepublished0cc_by2020-01-012023-12-311742-7061https://ror.org/05a28rw58899.05Elsevier31581
...................................................
400781435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/01swzsf04592.040079CUP35581
400791435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/019whta54592.040080CUP35581
400801435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/00vasag41592.040081CUP35581
400811435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05r0ap620592.040082CUP35581
400821435-8115Microscopy and MicroanalysisTruepublished60cc_by_nc_sa2021-01-012023-12-311431-9276https://ror.org/05pmsvm27592.040083CUP35581
-

40083 rows × 16 columns

-
- - - - -```python -term_rp = rp[['rp_id', 'version', 'archiving', 'embargo_months', 'cost_factor', 'licence', 'journal', 'rp_publisher', 'ror', 'valid_from', 'valid_until']] -term_rp -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rp_idversionarchivingembargo_monthscost_factorlicencejournalrp_publisherrorvalid_fromvalid_until
013True05811899.0Elsevierhttps://ror.org/04d8ztx872020-01-012023-12-31
123True05811899.0Elsevierhttps://ror.org/02bnkt3222020-01-012023-12-31
233True05811899.0Elsevierhttps://ror.org/00zg4za482020-01-012023-12-31
343True05811899.0Elsevierhttps://ror.org/02s3760522020-01-012023-12-31
453True05811899.0Elsevierhttps://ror.org/05a28rw582020-01-012023-12-31
....................................
40078400793True605815592.0CUPhttps://ror.org/01swzsf042021-01-012023-12-31
40079400803True605815592.0CUPhttps://ror.org/019whta542021-01-012023-12-31
40080400813True605815592.0CUPhttps://ror.org/00vasag412021-01-012023-12-31
40081400823True605815592.0CUPhttps://ror.org/05r0ap6202021-01-012023-12-31
40082400833True605815592.0CUPhttps://ror.org/05pmsvm272021-01-012023-12-31
-

40083 rows × 11 columns

-
- - - - -```python -term_rp['rp_publisher'].value_counts() -``` - - - - - Elsevier 18128 - Wiley 13905 - Springer Nature 6716 - CUP 920 - TF 414 - Name: rp_publisher, dtype: int64 - - - - -```python -term_rp.loc[term_rp['rp_publisher'] == 'Elsevier', 'comment'] = 'Elsevier Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'Wiley', 'comment'] = 'Wiley Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'TF', 'comment'] = 'Taylor and Francis Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'Springer Nature ', 'comment'] = 'Springer Nature Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'CUP', 'comment'] = 'Cambridge University Press (CUP) Read & Publish agreement. Article types covered: Research Articles, Review Articles, Rapid Communication, Brief Reports and Case Reports' -del term_rp['rp_publisher'] -term_rp -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexing.py:376: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - self.obj[key] = _infer_fill_value(value) - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\indexing.py:494: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - self.obj[item] = s - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rp_idversionarchivingembargo_monthscost_factorlicencejournalrorvalid_fromvalid_untilcomment
013True05811899.0https://ror.org/04d8ztx872020-01-012023-12-31Elsevier Read & Publish agreement
123True05811899.0https://ror.org/02bnkt3222020-01-012023-12-31Elsevier Read & Publish agreement
233True05811899.0https://ror.org/00zg4za482020-01-012023-12-31Elsevier Read & Publish agreement
343True05811899.0https://ror.org/02s3760522020-01-012023-12-31Elsevier Read & Publish agreement
453True05811899.0https://ror.org/05a28rw582020-01-012023-12-31Elsevier Read & Publish agreement
....................................
40078400793True605815592.0https://ror.org/01swzsf042021-01-012023-12-31Cambridge University Press (CUP) Read & Publis...
40079400803True605815592.0https://ror.org/019whta542021-01-012023-12-31Cambridge University Press (CUP) Read & Publis...
40080400813True605815592.0https://ror.org/00vasag412021-01-012023-12-31Cambridge University Press (CUP) Read & Publis...
40081400823True605815592.0https://ror.org/05r0ap6202021-01-012023-12-31Cambridge University Press (CUP) Read & Publis...
40082400833True605815592.0https://ror.org/05pmsvm272021-01-012023-12-31Cambridge University Press (CUP) Read & Publis...
-

40083 rows × 11 columns

-
- - - - -```python -# cocnat de deux tables -term_orig = term_sherpa[['id_sherpa', 'version', 'cost_factor', 'embargo_months', 'archiving', 'licence', 'journal', 'prerequisite_funders', 'ror', 'comment']] -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcomment
011NaN0TrueNaN532NaNNaNInstitutional archiving locations: Non-Commerc...
122NaN12TrueNaN532NaNNaNInstitutional archiving locations: Non-Commerc...
233355.00True1.0532NaNNaNInstitutional archiving locations: Any Website...
343355.00True2.0532NaNNaNInstitutional archiving locations: Any Website...
451NaN0FalseNaN498NaNNaNNon institutional archiving locations: ChemRxi...
.................................
859085911NaN0TrueNaN608NaNNaNInstitutional archiving locations: Institution...
859185922NaN0TrueNaN608NaNNaNInstitutional archiving locations: Institution...
859285933580.00TrueNaN608NaNNaNInstitutional archiving locations: Institution...
859385943580.00True1.0608NaNNaNInstitutional archiving locations: Any Repository
859485953580.00True1.0608NaNNaNInstitutional archiving locations: Any Repository
-

8595 rows × 10 columns

-
- - - - -```python -term_orig = term_orig.append(term_rp, ignore_index=True, sort=False) -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_until
01.01NaN0TrueNaN532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN
12.02NaN12TrueNaN532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN
23.03355.00True1.0532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN
34.03355.00True2.0532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN
45.01NaN0FalseNaN498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN
..........................................
48673NaN3581.060True5.0592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31
48674NaN3581.060True5.0592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-31
48675NaN3581.060True5.0592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-31
48676NaN3581.060True5.0592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-31
48677NaN3581.060True5.0592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-31
-

48678 rows × 13 columns

-
- - - - -```python -# ajout d'un hash unique pour chaque variante -term_orig['id_content_hash'] = term_orig.apply(lambda x: hash(tuple(x[['version', 'cost_factor', 'embargo_months', 'archiving', 'comment']])), axis = 1) -term_orig['id_content_hash_licence'] = term_orig.apply(lambda x: hash(tuple(x[['version', 'cost_factor', 'embargo_months', 'archiving', 'licence', 'comment']])), axis = 1) -``` - - -```python -term_orig.sort_values(by='id_content_hash') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licence
65996600.02NaN12TrueNaN923.0Truehttps://ror.org/056y81r79Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
68676868.02NaN12TrueNaN957.0Truehttps://ror.org/056bwcz71Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
47504751.02NaN12TrueNaN642.0Truehttps://ror.org/05w9mt194Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
82368237.02NaN12TrueNaN640.0Truehttps://ror.org/02wxr8x18Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
82378238.02NaN12TrueNaN640.0Truehttps://ror.org/056y81r79Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
................................................
63536354.03222.00True1.0190.0Truehttps://ror.org/02wdwnk04Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63526353.03222.00True1.0190.0Truehttps://ror.org/029chgv08Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63626363.03222.00True1.0190.0Truehttps://ror.org/0472cxd90Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63576358.03222.00True1.0190.0Truehttps://ror.org/0456r8d26Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63636364.03222.00True1.0190.0Truehttps://ror.org/03x94j517Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
-

48678 rows × 15 columns

-
- - - - -```python -# doublons -term_orig.loc[term_orig.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licence
66076608.02NaN12TrueNaN175.0Truehttps://ror.org/02wxr8x18Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
65086509.02NaN12TrueNaN64.0Truehttps://ror.org/05w9mt194Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
12941295.02NaN12TrueNaN342.0Truehttps://ror.org/056bwcz71Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
55615562.02NaN12TrueNaN27.0Truehttps://ror.org/05w9mt194Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
55595560.02NaN12TrueNaN27.0Truehttps://ror.org/056y81r79Institutional archiving locations: Non-Commerc...NaNNaNNaN-9213354388875732238-5975042390572407328
................................................
63556356.03222.00True1.0190.0Truehttps://ror.org/00cwqg982Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63546355.03222.00True1.0190.0Truehttps://ror.org/02jkpm469Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63536354.03222.00True1.0190.0Truehttps://ror.org/02wdwnk04Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63646365.03222.00True1.0190.0Truehttps://ror.org/02gq0fg61Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
63596360.03222.00True1.0190.0Truehttps://ror.org/01613vh25Institutional archiving locations: Institution...NaNNaNNaN9219045216097074691-8427874628140339220
-

47358 rows × 15 columns

-
- - - - -```python -term_orig['licence'] = term_orig['licence'].fillna(999999) -term_orig['licence'] = term_orig['licence'].astype(int) -term_orig['cost_factor'] = term_orig['cost_factor'].fillna(999999) -term_orig['cost_factor'] = term_orig['cost_factor'].astype(int) -# term_orig['embargo_months'] = term_orig['embargo_months'].fillna(0) -# term_orig['embargo_months'] = term_orig['embargo_months'].astype(int) -term_orig.loc[term_orig['archiving'] == True, 'ir_archiving'] = 1 -term_orig.loc[term_orig['archiving'] == False, 'ir_archiving'] = 0 -term_orig['ir_archiving'] = term_orig['ir_archiving'].fillna(0) -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archiving
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-81946125451688170121.0
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-118714631786122957710807856572614408351.0
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-44106140441472479071.0
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-4928686093300740071.0
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-27818217695488029669357667652881371100.0
...................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31768737782784609585522984959422009563581.0
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-31768737782784609585522984959422009563581.0
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-31768737782784609585522984959422009563581.0
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-31768737782784609585522984959422009563581.0
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-31768737782784609585522984959422009563581.0
-

48678 rows × 16 columns

-
- - - - -```python -term_orig.loc[term_orig['ir_archiving'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archiving
-
- - - - -```python -term_orig['ir_archiving'].value_counts() -``` - - - - - 1.0 47467 - 0.0 1211 - Name: ir_archiving, dtype: int64 - - - - -```python -term_orig['licence'] = term_orig['licence'].astype(int) -term_orig['ir_archiving'] = term_orig['ir_archiving'].astype(int) -term_orig['cost_factor'] = term_orig['cost_factor'].astype(int) -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archiving
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-81946125451688170121
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-118714631786122957710807856572614408351
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-44106140441472479071
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-4928686093300740071
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-27818217695488029669357667652881371100
...................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31768737782784609585522984959422009563581
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-31768737782784609585522984959422009563581
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-31768737782784609585522984959422009563581
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-31768737782784609585522984959422009563581
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-31768737782784609585522984959422009563581
-

48678 rows × 16 columns

-
- - - - -```python -terms_export_dates = term_orig.loc[(term_orig['valid_from'].notna()) | (term_orig['valid_until'].notna())][['id_content_hash', 'ror', 'valid_from', 'valid_until']] -terms_export_dates -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_content_hashrorvalid_fromvalid_until
8595-6020029623494903364https://ror.org/04d8ztx872020-01-012023-12-31
8596-6020029623494903364https://ror.org/02bnkt3222020-01-012023-12-31
8597-6020029623494903364https://ror.org/00zg4za482020-01-012023-12-31
8598-6020029623494903364https://ror.org/02s3760522020-01-012023-12-31
8599-6020029623494903364https://ror.org/05a28rw582020-01-012023-12-31
...............
486737687377827846095855https://ror.org/01swzsf042021-01-012023-12-31
486747687377827846095855https://ror.org/019whta542021-01-012023-12-31
486757687377827846095855https://ror.org/00vasag412021-01-012023-12-31
486767687377827846095855https://ror.org/05r0ap6202021-01-012023-12-31
486777687377827846095855https://ror.org/05pmsvm272021-01-012023-12-31
-

40083 rows × 4 columns

-
- - - - -```python -terms_export = term_orig[['id_sherpa', 'rp_id', 'id_content_hash', 'id_content_hash_licence', 'version', 'cost_factor', 'embargo_months', 'ir_archiving', 'licence', 'comment']] -terms_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...
.................................
48673NaN40079.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
48674NaN40080.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
48675NaN40081.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
48676NaN40082.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
48677NaN40083.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
-

48678 rows × 10 columns

-
- - - - -```python -# test de doublons -terms_export.loc[terms_export.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
66076608.0NaN-9213354388875732238-59750423905724073282999999121999999Institutional archiving locations: Non-Commerc...
65086509.0NaN-9213354388875732238-59750423905724073282999999121999999Institutional archiving locations: Non-Commerc...
12941295.0NaN-9213354388875732238-59750423905724073282999999121999999Institutional archiving locations: Non-Commerc...
55615562.0NaN-9213354388875732238-59750423905724073282999999121999999Institutional archiving locations: Non-Commerc...
55595560.0NaN-9213354388875732238-59750423905724073282999999121999999Institutional archiving locations: Non-Commerc...
.................................
63556356.0NaN9219045216097074691-84278746281403392203222011Institutional archiving locations: Institution...
63546355.0NaN9219045216097074691-84278746281403392203222011Institutional archiving locations: Institution...
63536354.0NaN9219045216097074691-84278746281403392203222011Institutional archiving locations: Institution...
63646365.0NaN9219045216097074691-84278746281403392203222011Institutional archiving locations: Institution...
63596360.0NaN9219045216097074691-84278746281403392203222011Institutional archiving locations: Institution...
-

47358 rows × 10 columns

-
- - - - -```python -terms_export_dedup = terms_export.drop_duplicates(subset=['id_content_hash']) -terms_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...
.................................
8595NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement
26723NaN18129.0-195526209948827643863594828014331812613581011NaN
33439NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement
47344NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement
47758NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...
-

1320 rows × 10 columns

-
- - - - -```python -terms_export_dedup_licence = terms_export.drop_duplicates(subset=['id_content_hash_licence']) -terms_export_dedup_licence -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...
.................................
47344NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement
47758NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...
47988NaN39394.07687377827846095855229849776618844805935816014Cambridge University Press (CUP) Read & Publis...
48218NaN39624.07687377827846095855229848607945021166535816012Cambridge University Press (CUP) Read & Publis...
48448NaN39854.07687377827846095855229849594220095635835816015Cambridge University Press (CUP) Read & Publis...
-

1590 rows × 10 columns

-
- - - - -```python -# test de doublons -terms_export_dedup_licence.loc[terms_export_dedup_licence.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
15691570.0NaN-9114006443623277513-72733887763620604913413002Non institutional archiving locations: PubMed ...
582583.0NaN-9011072484834895623-59116051124023388893379012Institutional archiving locations: Any Reposit...
85538554.0NaN-886163005461322845471767730880766240153573003Non institutional archiving locations: Funder ...
85528553.0NaN-886163005461322845471767734743964336903573002Non institutional archiving locations: Funder ...
82648265.0NaN-8856152899298491735-12199961119101615613560014Institutional archiving locations: Non-Commerc...
.................................
85608561.0NaN873544693264154295143320462503649956953574002Non institutional archiving locations: Funder ...
85618562.0NaN873544693264154295143320481179378659783574003Non institutional archiving locations: Funder ...
22222223.0NaN87452533838935247195211347029898937223431011Institutional archiving locations: Any Website...
41524153.0NaN884524375673695509861001134560954228313464011Institutional archiving locations: Any Website...
43514352.0NaN9036026380223066491-15394902416656550363470011Institutional archiving locations: Institution...
-

270 rows × 10 columns

-
- - - - -```python -# totaux pour les deux sources -terms_export_dedup.loc[terms_export_dedup['id_sherpa'].notna()].shape[0] -``` - - - - - 1315 - - - - -```python -terms_export_dedup.loc[terms_export_dedup['rp_id'].notna()].shape[0] -``` - - - - - 5 - - - - -```python -terms_export_dedup.loc[terms_export_dedup['rp_id'].notna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
8595NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement
26723NaN18129.0-195526209948827643863594828014331812613581011NaN
33439NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement
47344NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement
47758NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...
-
- - - - -```python -# convertir l'index en id -terms_export_dedup.reset_index(inplace=True) -del terms_export_dedup['index'] -terms_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecomment
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...
.................................
1315NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement
1316NaN18129.0-195526209948827643863594828014331812613581011NaN
1317NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement
1318NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement
1319NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...
-

1320 rows × 10 columns

-
- - - - -```python -# ajout de l'id avec l'index + 1 -terms_export_dedup['id'] = terms_export_dedup.index + 1 -# del terms_export_dedup['index'] -terms_export_dedup -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecommentid
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...1
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...2
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...3
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...4
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...5
....................................
1315NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement1316
1316NaN18129.0-195526209948827643863594828014331812613581011NaN1317
1317NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement1318
1318NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement1319
1319NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...1320
-

1320 rows × 11 columns

-
- - - - -```python -terms_export_dedup['source'] = '' -terms_export_dedup -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - """Entry point for launching an IPython kernel. - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecommentidsource
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...1
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...2
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...3
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...4
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...5
.......................................
1315NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement1316
1316NaN18129.0-195526209948827643863594828014331812613581011NaN1317
1317NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement1318
1318NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement1319
1319NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...1320
-

1320 rows × 12 columns

-
- - - - -```python -# grouper par licence -terms_export_dedup_licences = terms_export_dedup_licence[['licence', 'id_content_hash']] -terms_export_dedup_licences -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
licenceid_content_hash
0999999-5068777248818105392
1999999-1187146317861229577
21-6827815856646016670
325388365857945903435
4999999-2781821769548802966
.........
4734416747956201225830719
4775817687377827846095855
4798847687377827846095855
4821827687377827846095855
4844857687377827846095855
-

1590 rows × 2 columns

-
- - - - -```python -# concat valeurs avec même id -terms_export_dedup_licences['licence'] = terms_export_dedup_licences['licence'].astype(str) -terms_export_dedup_licences = terms_export_dedup_licences.groupby('id_content_hash').agg({'licence': lambda x: ', '.join(x)}) -terms_export_dedup_licences -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
licence
id_content_hash
-9213354388875732238999999
-9200070744422558377999999
-91717831170231043951
-91349526464689481631
-91330136487514062891
......
91950013304323528931
92004661683459815431
92138788081787292532
92183892089127778822
92190452160970746911
-

1320 rows × 1 columns

-
- - - - -```python -# test des valeur multiples -terms_export_dedup_licences.loc[terms_export_dedup_licences['licence'].str.contains(',')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
licence
id_content_hash
-91140064436232775131, 2
-90110724848348956231, 2
-88616300546132284541, 2, 3
-88561528992984917351, 4
-86071675687205191891, 4
......
87121617774363853901, 4
87354469326415429511, 2, 3
87452533838935247192, 1
88452437567369550982, 1
90360263802230664912, 1
-

185 rows × 1 columns

-
- - - - -```python -# ajout des licences groupées -terms_export_dedup_fin = pd.merge(terms_export_dedup, terms_export_dedup_licences, on='id_content_hash', how='left') -terms_export_dedup_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicence_xcommentidsourcelicence_y
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...1999999
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...2999999
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...31
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...42
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...5999999
..........................................
1315NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement13161, 2
1316NaN18129.0-195526209948827643863594828014331812613581011NaN13171, 4
1317NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement13181, 4, 2
1318NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement13191
1319NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...13201, 4, 2, 5
-

1320 rows × 13 columns

-
- - - - -```python -# merge avec les dates pour avoir les terms ids -terms_export_dates = pd.merge(terms_export_dates, terms_export_dedup_fin[['id_content_hash', 'id']], on='id_content_hash') -terms_export_dates = terms_export_dates.rename(columns = {'id' : 'term'}) -terms_export_dates -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_content_hashrorvalid_fromvalid_untilterm
0-6020029623494903364https://ror.org/04d8ztx872020-01-012023-12-311316
1-6020029623494903364https://ror.org/02bnkt3222020-01-012023-12-311316
2-6020029623494903364https://ror.org/00zg4za482020-01-012023-12-311316
3-6020029623494903364https://ror.org/02s3760522020-01-012023-12-311316
4-6020029623494903364https://ror.org/05a28rw582020-01-012023-12-311316
..................
400787687377827846095855https://ror.org/01swzsf042021-01-012023-12-311320
400797687377827846095855https://ror.org/019whta542021-01-012023-12-311320
400807687377827846095855https://ror.org/00vasag412021-01-012023-12-311320
400817687377827846095855https://ror.org/05r0ap6202021-01-012023-12-311320
400827687377827846095855https://ror.org/05pmsvm272021-01-012023-12-311320
-

40083 rows × 5 columns

-
- - - - -```python -# renommer les champs de licence -del terms_export_dedup_fin['licence_x'] -terms_export_dedup_fin = terms_export_dedup_fin.rename(columns = {'licence_y' : 'licence'}) -``` - - -```python -terms_export_fin = terms_export_dedup_fin[['version', 'cost_factor', 'embargo_months', 'ir_archiving', 'licence', 'comment', 'id', 'source']] -terms_export_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
versioncost_factorembargo_monthsir_archivinglicencecommentidsource
0199999901999999Institutional archiving locations: Non-Commerc...1
12999999121999999Institutional archiving locations: Non-Commerc...2
23355011Institutional archiving locations: Any Website...3
33355012Institutional archiving locations: Any Website...4
4199999900999999Non institutional archiving locations: ChemRxi...5
...........................
13153581011, 2Elsevier Read & Publish agreement1316
13163581011, 4NaN1317
13173581011, 4, 2Wiley Read & Publish agreement1318
13183581011Taylor and Francis Read & Publish agreement1319
131935816011, 4, 2, 5Cambridge University Press (CUP) Read & Publis...1320
-

1320 rows × 8 columns

-
- - - - -```python -# export de la table -result = terms_export_fin.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/term.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -terms_export_fin.to_csv('sample/term.tsv', index=False) -``` - - -```python -# export excel -terms_export_fin.to_excel('sample/term.xlsx', index=False) -``` - -## Table condition_type - - -```python -# Journal-only, Organization-only, Journal-organization agreement -col_names = ['id', - 'condition_issuer' - ] -condition_type = pd.DataFrame(columns = col_names) -condition_type = condition_type.append({'id' : 1, 'condition_issuer' : 'Journal-only'}, ignore_index=True) -condition_type = condition_type.append({'id' : 2, 'condition_issuer' : 'Organization-only'}, ignore_index=True) -condition_type = condition_type.append({'id' : 3, 'condition_issuer' : 'Journal-organization agreement'}, ignore_index=True) -condition_type -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - -
idcondition_issuer
01Journal-only
12Organization-only
23Journal-organization agreement
-
- - - - -```python -# export de la table -result = condition_type.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/condition_type.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export csv -condition_type.to_csv('sample/condition_type.tsv', index=False) -``` - - -```python -# export excel -condition_type.to_excel('sample/condition_type.xlsx', index=False) -``` - -## Table organization - - -```python -# extraction des organizations (funders) -sherpa -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
05320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTrueMust acknowledge acceptance for publication ; ...NaN10001-28151NaNNaN
15320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/2050nonoacceptedNaN12NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; named_repository ; non_comm...Non-Commercial Institutional RepositoryAuthor's Homepage ; arXiv ; AgEcon ; PhilPaper...arXiv ; AgEcon ; PhilPapers ; PubMed Central ;...NaNNaNNaNTruePublisher source must be acknowledged with cit...NaN20001-28152NaNNaN
25320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; institutional_repository ; named...Any Website ; Institutional RepositoryPubMed Central ; Subject Repository ; Journal ...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN30001-281531.0355.0
35320001-281511905https://v2.sherpa.ac.uk/id/publisher_policy/3315noyespublishedcc_by_nc_nd0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_website ; named_repository ; non_commercia...Any Website ; Non-Commercial Institutional Rep...PubMed Central ; Non-Commercial Subject Reposi...PubMed CentralNaNauthorsdisciplinary (PubMed Central) ;TruePublished source must be acknowledgedNaN40001-281532.0355.0
44980001-48427760https://v2.sherpa.ac.uk/id/publisher_policy/4nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNnamed_repository ; preprint_repository ; subje...NaNChemRxiv ; bioRxiv ; arXiv ; Preprint Reposito...ChemRxiv ; bioRxiv ; arXivNaNNaNNaNFalseMust not violate ACS ethical Guidelines ; Must...NaN50001-48421NaNNaN
......................................................................................................
85906082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonosubmittedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85912475-99531NaNNaN
85916082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonoacceptedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85922475-99532NaNNaN
85926082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10nonopublishedNaN0NaNNaNNaNNaNNaNNaNNaNNaNNaNauthors_homepage ; institutional_repository ; ...Institutional Repository ; Institutional WebsiteAuthor's HomepageNaNNaNNaNNaNTrueMust link to published article ; Publisher cop...NaN85932475-99533NaN580.0
85936082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85942475-995331.0580.0
85946082475-995333503https://v2.sherpa.ac.uk/id/publisher_policy/10noyespublishedcc_by0NaNNaNNaNNaNNaNNaNNaNNaNNaNany_repository ; this_journalAny RepositoryJournal WebsiteNaNNaNNaNNaNTrueNaNNaN85952475-995331.0580.0
-

8595 rows × 33 columns

-
- - - - -```python -sherpa.loc[sherpa['prerequisite_funders'].notna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journalissnsherpa_idsherpa_uriopen_access_prohibitedadditional_oa_feearticle_versionsherpa_codeembargoprerequisitesprerequisite_fundersprerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_idprerequisite_subjectslocationlocations_irlocations_not_irnamed_repositorynamed_academic_social_networkcopyright_ownerpublisher_depositarchivingconditionspublic_notesidissnlversionlicencecost_factor
167890001-49664049https://v2.sherpa.ac.uk/id/publisher_policy/126nonopublishedNaN12NaNTrueNational Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0NaNnamed_repositoryNaNPubMed CentralPubMed CentralNaNNaNdisciplinary (PubMed Central) ;FalseNaNNaN170001-49663NaN357.0
286680002-07291334https://v2.sherpa.ac.uk/id/publisher_policy/1107nonoacceptedNaN12NaNTrueNational Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0NaNnamed_repositoryNaNPubMed CentralPubMed CentralNaNNaNdisciplinary (PubMed Central) ;FalseNaNNaN290002-07292NaNNaN
589850002-934312950https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueWellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08gbhttp://www.wellcome.ac.uk/695.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN590002-934331.0223.0
599850002-934312950https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueBritish Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04gbhttp://www.bhf.org.uk/18.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN600002-934331.0223.0
609850002-934312950https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueVersus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469gbhttps://www.versusarthritis.org/14.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN610002-934331.0223.0
......................................................................................................
85109902211-285520490https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueEuropean Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90behttp://erc.europa.eu/31.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN85112211-285531.0352.0
85119902211-285520490https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueMedical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517gbhttp://www.mrc.ac.uk/index.htm705.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN85122211-285531.0352.0
85129902211-285520490https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueMotor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61gbhttp://www.mndassociation.org/562.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN85132211-285531.0352.0
85139902211-285520490https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueParkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338gbhttp://www.parkinsons.org.uk/411.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN85142211-285531.0352.0
85149902211-285520490https://v2.sherpa.ac.uk/id/publisher_policy/3323noyespublishedcc_by0NaNTrueTelethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18ithttps://www.telethon.it/en/325.0NaNinstitutional_repository ; named_repository ; ...Institutional RepositoryPubMed Central ; Research for Development Repo...PubMed Central ; Research for Development Repo...NaNNaNdisciplinary (PubMed Central) ;TruePublished source must be acknowledged with cit...NaN85152211-285531.0352.0
-

5585 rows × 33 columns

-
- - - - -```python -sherpa['prerequisite_funders'].value_counts() -``` - - - - - True 5585 - Name: prerequisite_funders, dtype: int64 - - - - -```python -funders = sherpa.loc[sherpa['prerequisite_funders'].notna()][['prerequisite_funders_name', 'prerequisite_funders_fundref', 'prerequisite_funders_ror', 'prerequisite_funders_country', 'prerequisite_funders_url', 'prerequisite_funders_sherpa_id']] -funders -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
prerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_id
16National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0
28National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0
58Wellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08gbhttp://www.wellcome.ac.uk/695.0
59British Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04gbhttp://www.bhf.org.uk/18.0
60Versus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469gbhttps://www.versusarthritis.org/14.0
.....................
8510European Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90behttp://erc.europa.eu/31.0
8511Medical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517gbhttp://www.mrc.ac.uk/index.htm705.0
8512Motor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61gbhttp://www.mndassociation.org/562.0
8513Parkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338gbhttp://www.parkinsons.org.uk/411.0
8514Telethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18ithttps://www.telethon.it/en/325.0
-

5585 rows × 6 columns

-
- - - - -```python -funders_dedup = funders.drop_duplicates(subset='prerequisite_funders_ror') -funders_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
prerequisite_funders_nameprerequisite_funders_fundrefprerequisite_funders_rorprerequisite_funders_countryprerequisite_funders_urlprerequisite_funders_sherpa_id
16National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0
58Wellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08gbhttp://www.wellcome.ac.uk/695.0
59British Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04gbhttp://www.bhf.org.uk/18.0
60Versus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469gbhttps://www.versusarthritis.org/14.0
61Biotechnology and Biological Sciences Research...http://dx.doi.org/10.13039/501100000268https://ror.org/00cwqg982gbhttp://www.bbsrc.ac.uk/home/home.aspx709.0
62Blood Cancer UKhttp://dx.doi.org/10.13039/501100007903https://ror.org/0055acf80gbhttps://bloodcancer.org.uk/925.0
63Bill & Melinda Gates Foundationhttp://dx.doi.org/10.13039/100000865https://ror.org/0456r8d26ushttp://www.gatesfoundation.org/961.0
64Cancer Research UKhttp://dx.doi.org/10.13039/501100000289https://ror.org/054225q67gbhttp://www.cancerresearchuk.org/19.0
65Chief Scientist Office, Scottish Executive (CSO)http://dx.doi.org/10.13039/501100000589https://ror.org/01613vh25gbhttp://www.cso.scot.nhs.uk/16.0
66Department of Health (DH)http://dx.doi.org/10.13039/501100000272https://ror.org/0187kwz08gbhttp://www.dh.gov.uk/en/index.htm943.0
67Dunhill Medical Trust (DMT)http://dx.doi.org/10.13039/501100000377https://ror.org/05ayqqv15gbhttps://dunhillmedical.org.uk/410.0
68European Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90behttp://erc.europa.eu/31.0
69Medical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517gbhttp://www.mrc.ac.uk/index.htm705.0
70Motor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61gbhttp://www.mndassociation.org/562.0
71Parkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338gbhttp://www.parkinsons.org.uk/411.0
72Telethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18ithttps://www.telethon.it/en/325.0
99Howard Hughes Medical Institute (HHMI)http://dx.doi.org/10.13039/100000011https://ror.org/006w34k90ushttp://www.hhmi.org/24.0
149Arts and Humanities Research Council (AHRC)http://dx.doi.org/10.13039/501100000267https://ror.org/0505m1554gbhttp://www.ahrc.ac.uk/Pages/Home.aspx698.0
150Austrian Science Fund (FWF)http://dx.doi.org/10.13039/501100002428https://ror.org/013tf3c58athttp://www.fwf.ac.at/en/13.0
153Breast Cancer Nowhttp://dx.doi.org/10.13039/501100007913https://ror.org/02qa92s63gbhttp://breastcancernow.org/1065.0
156Engineering and Physical Sciences Research Cou...http://dx.doi.org/10.13039/501100000266https://ror.org/0439y7842gbhttp://www.epsrc.ac.uk/Pages/default.aspx722.0
159Natural Environment Research Council (NERC)http://dx.doi.org/10.13039/501100000270https://ror.org/02b5d8509gbhttps://nerc.ukri.org/726.0
162Science and Technology Facilities Council (STFC)http://dx.doi.org/10.13039/501100000271https://ror.org/057g20z61gbhttp://www.stfc.ac.uk/716.0
164Vetenskapsrådethttp://dx.doi.org/10.13039/501100004359https://ror.org/03zttf063sehttp://www.vr.se/302.0
165World Health Organization (WHO)http://dx.doi.org/10.13039/100004423https://ror.org/01f80g185chhttp://www.who.int/903.0
166World Bankhttp://dx.doi.org/10.13039/100004421https://ror.org/00ae7jd04ushttp://www.worldbank.org/525.0
167Yorkshire Cancer Researchhttp://dx.doi.org/10.13039/501100002653https://ror.org/02cddnn97gbhttp://www.yorkshirecancerresearch.org.uk/428.0
169Economic and Social Research Council (ESRC)http://dx.doi.org/10.13039/501100000269https://ror.org/03n0ht308gbhttp://www.esrc.ac.uk/717.0
418Higher Education Funding Council for England (...http://dx.doi.org/10.13039/501100000384https://ror.org/02wxr8x18gbhttp://www.hefce.ac.uk/877.0
419Higher Education Funding Council for Wales (HE...http://dx.doi.org/10.13039/501100000383https://ror.org/056y81r79gbhttp://www.hefcw.ac.uk/home/home.aspx881.0
420Scottish Funding Council (SFC)http://dx.doi.org/10.13039/501100000360https://ror.org/056bwcz71gbhttp://www.sfc.ac.uk/887.0
421Department for the Economy, Northern Irelandhttp://dx.doi.org/10.13039/100008303https://ror.org/05w9mt194gbhttps://www.economy-ni.gov.uk/884.0
960Academy of Finlandhttp://dx.doi.org/10.13039/501100002341https://ror.org/05k73zm37fihttps://www.aka.fi/en/1248.0
961Agence Nationale de la Recherche (ANR)http://dx.doi.org/10.13039/501100001665https://ror.org/00rbzpz17frhttp://www.agence-nationale-recherche.fr/30.0
963Fundação para a Ciência e a Tecnologiahttp://dx.doi.org/10.13039/501100001871https://ror.org/00snfqn58pthttp://www.fct.pt/1109.0
964Formashttp://dx.doi.org/10.13039/501100001862https://ror.org/03pjs1y45sehttp://www.formas.se/452.0
967Nederlandse Organisatie voor Wetenschappelijk ...http://dx.doi.org/10.13039/501100003246https://ror.org/04jsz6e67nlhttp://www.nwo.nl/459.0
968Science Foundation Ireland (SFI)http://dx.doi.org/10.13039/501100001602https://ror.org/0271asj38iehttp://www.sfi.ie/210.0
970Research Council of Norwayhttp://dx.doi.org/10.13039/501100005416https://ror.org/00epmv149nohttps://www.forskningsradet.no/en/266.0
971Forskningsrådet för hälsa, arbetsliv och välfä...http://dx.doi.org/10.13039/501100006636https://ror.org/02d290r06sehttp://www.forte.se/455.0
978Innovate UKhttp://dx.doi.org/10.13039/501100000266https://ror.org/05ar5fy68gbhttps://www.gov.uk/government/organisations/in...1267.0
1048Diabetes UKhttp://dx.doi.org/10.13039/501100000361https://ror.org/050rgn017gbhttp://www.diabetes.org.uk/492.0
1052Marie Curiehttp://dx.doi.org/10.13039/501100000654https://ror.org/02aqv1x10gbhttp://www.mariecurie.org.uk/595.0
1055Action on Hearing Losshttp://dx.doi.org/10.13039/501100000703https://ror.org/05w6qh410gbhttp://www.actiononhearingloss.org.uk/412.0
1056Alzheimer's Societyhttp://dx.doi.org/10.13039/501100000320https://ror.org/0472gwq90gbhttp://alzheimers.org.uk/443.0
1063Multiple Sclerosis Societyhttp://dx.doi.org/10.13039/501100000381https://ror.org/043fwdk81gbhttp://www.mssociety.org.uk/745.0
1064Myrovlytis Trusthttp://dx.doi.org/10.13039/501100001291https://ror.org/05bj02613gbhttp://www.myrovlytistrust.org/858.0
1065National Centre for the Replacement, Refinemen...http://dx.doi.org/10.13039/501100000849https://ror.org/02w0kg036gbhttp://www.nc3rs.org.uk/859.0
1072Worldwide Cancer Reseachhttp://dx.doi.org/10.13039/100004423https://ror.org/031tfbz57gbhttp://www.worldwidecancerresearch.org/425.0
2219Canadian Institutes of Health Research (CIHR)http://dx.doi.org/10.13039/501100000024https://ror.org/01gavpb45cahttp://www.cihr-irsc.gc.ca/28.0
5490US Department of Energy (DOE)http://dx.doi.org/10.13039/100000015https://ror.org/01bj3aw27ushttp://energy.gov/962.0
5491Agency for Healthcare Research and Quality (AHRQ)http://dx.doi.org/10.13039/100000133https://ror.org/03jmfdf59ushttp://www.ahrq.gov/index.html981.0
5492Institute of Education Sciences (IES)http://dx.doi.org/10.13039/100005246https://ror.org/04et59085ushttp://ies.ed.gov/291.0
5493National Aeronautics and Space Administration ...http://dx.doi.org/10.13039/100000104https://ror.org/027ka1x80ushttp://science.nasa.gov/986.0
5494National Science Foundation (NSF)http://dx.doi.org/10.13039/100000001https://ror.org/021nxhr62ushttp://www.nsf.gov/354.0
7232Academy of Medical Sciencehttp://dx.doi.org/10.13039/501100000691https://ror.org/00c489v88gbhttps://acmedsci.ac.uk/1125.0
7239Prostate Cancer UKhttp://dx.doi.org/10.13039/501100000771https://ror.org/04dkv6329gbhttp://prostatecanceruk.org/742.0
7240Schweizerischer Nationalfonds zur Förderung de...http://dx.doi.org/10.13039/501100001711https://ror.org/00yjd3n13chhttp://www.snf.ch/de/Seiten/default.aspx25.0
-
- - - - -```python -funders_dedup.shape[0] -``` - - - - - 58 - - - - -```python -# export excel -funders_dedup.to_excel('sample/funders.xlsx', index=False) -``` - - -```python -# export csv -funders_dedup.to_csv('sample/funders.tsv', index=False) -``` - - -```python -# creation du DF -organization_funders = funders_dedup -organization_funders = organization_funders.rename(columns = {'prerequisite_funders_name' : 'name', - 'prerequisite_funders_fundref' : 'fundref', - 'prerequisite_funders_ror' : 'ror', - 'prerequisite_funders_country' : 'iso_code', - 'prerequisite_funders_url' : 'website', - 'prerequisite_funders_sherpa_id' : 'sherpa_id' - }) -organization_funders -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namefundrefroriso_codewebsitesherpa_id
16National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88ushttp://www.nih.gov/9.0
58Wellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08gbhttp://www.wellcome.ac.uk/695.0
59British Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04gbhttp://www.bhf.org.uk/18.0
60Versus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469gbhttps://www.versusarthritis.org/14.0
61Biotechnology and Biological Sciences Research...http://dx.doi.org/10.13039/501100000268https://ror.org/00cwqg982gbhttp://www.bbsrc.ac.uk/home/home.aspx709.0
62Blood Cancer UKhttp://dx.doi.org/10.13039/501100007903https://ror.org/0055acf80gbhttps://bloodcancer.org.uk/925.0
63Bill & Melinda Gates Foundationhttp://dx.doi.org/10.13039/100000865https://ror.org/0456r8d26ushttp://www.gatesfoundation.org/961.0
64Cancer Research UKhttp://dx.doi.org/10.13039/501100000289https://ror.org/054225q67gbhttp://www.cancerresearchuk.org/19.0
65Chief Scientist Office, Scottish Executive (CSO)http://dx.doi.org/10.13039/501100000589https://ror.org/01613vh25gbhttp://www.cso.scot.nhs.uk/16.0
66Department of Health (DH)http://dx.doi.org/10.13039/501100000272https://ror.org/0187kwz08gbhttp://www.dh.gov.uk/en/index.htm943.0
67Dunhill Medical Trust (DMT)http://dx.doi.org/10.13039/501100000377https://ror.org/05ayqqv15gbhttps://dunhillmedical.org.uk/410.0
68European Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90behttp://erc.europa.eu/31.0
69Medical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517gbhttp://www.mrc.ac.uk/index.htm705.0
70Motor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61gbhttp://www.mndassociation.org/562.0
71Parkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338gbhttp://www.parkinsons.org.uk/411.0
72Telethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18ithttps://www.telethon.it/en/325.0
99Howard Hughes Medical Institute (HHMI)http://dx.doi.org/10.13039/100000011https://ror.org/006w34k90ushttp://www.hhmi.org/24.0
149Arts and Humanities Research Council (AHRC)http://dx.doi.org/10.13039/501100000267https://ror.org/0505m1554gbhttp://www.ahrc.ac.uk/Pages/Home.aspx698.0
150Austrian Science Fund (FWF)http://dx.doi.org/10.13039/501100002428https://ror.org/013tf3c58athttp://www.fwf.ac.at/en/13.0
153Breast Cancer Nowhttp://dx.doi.org/10.13039/501100007913https://ror.org/02qa92s63gbhttp://breastcancernow.org/1065.0
156Engineering and Physical Sciences Research Cou...http://dx.doi.org/10.13039/501100000266https://ror.org/0439y7842gbhttp://www.epsrc.ac.uk/Pages/default.aspx722.0
159Natural Environment Research Council (NERC)http://dx.doi.org/10.13039/501100000270https://ror.org/02b5d8509gbhttps://nerc.ukri.org/726.0
162Science and Technology Facilities Council (STFC)http://dx.doi.org/10.13039/501100000271https://ror.org/057g20z61gbhttp://www.stfc.ac.uk/716.0
164Vetenskapsrådethttp://dx.doi.org/10.13039/501100004359https://ror.org/03zttf063sehttp://www.vr.se/302.0
165World Health Organization (WHO)http://dx.doi.org/10.13039/100004423https://ror.org/01f80g185chhttp://www.who.int/903.0
166World Bankhttp://dx.doi.org/10.13039/100004421https://ror.org/00ae7jd04ushttp://www.worldbank.org/525.0
167Yorkshire Cancer Researchhttp://dx.doi.org/10.13039/501100002653https://ror.org/02cddnn97gbhttp://www.yorkshirecancerresearch.org.uk/428.0
169Economic and Social Research Council (ESRC)http://dx.doi.org/10.13039/501100000269https://ror.org/03n0ht308gbhttp://www.esrc.ac.uk/717.0
418Higher Education Funding Council for England (...http://dx.doi.org/10.13039/501100000384https://ror.org/02wxr8x18gbhttp://www.hefce.ac.uk/877.0
419Higher Education Funding Council for Wales (HE...http://dx.doi.org/10.13039/501100000383https://ror.org/056y81r79gbhttp://www.hefcw.ac.uk/home/home.aspx881.0
420Scottish Funding Council (SFC)http://dx.doi.org/10.13039/501100000360https://ror.org/056bwcz71gbhttp://www.sfc.ac.uk/887.0
421Department for the Economy, Northern Irelandhttp://dx.doi.org/10.13039/100008303https://ror.org/05w9mt194gbhttps://www.economy-ni.gov.uk/884.0
960Academy of Finlandhttp://dx.doi.org/10.13039/501100002341https://ror.org/05k73zm37fihttps://www.aka.fi/en/1248.0
961Agence Nationale de la Recherche (ANR)http://dx.doi.org/10.13039/501100001665https://ror.org/00rbzpz17frhttp://www.agence-nationale-recherche.fr/30.0
963Fundação para a Ciência e a Tecnologiahttp://dx.doi.org/10.13039/501100001871https://ror.org/00snfqn58pthttp://www.fct.pt/1109.0
964Formashttp://dx.doi.org/10.13039/501100001862https://ror.org/03pjs1y45sehttp://www.formas.se/452.0
967Nederlandse Organisatie voor Wetenschappelijk ...http://dx.doi.org/10.13039/501100003246https://ror.org/04jsz6e67nlhttp://www.nwo.nl/459.0
968Science Foundation Ireland (SFI)http://dx.doi.org/10.13039/501100001602https://ror.org/0271asj38iehttp://www.sfi.ie/210.0
970Research Council of Norwayhttp://dx.doi.org/10.13039/501100005416https://ror.org/00epmv149nohttps://www.forskningsradet.no/en/266.0
971Forskningsrådet för hälsa, arbetsliv och välfä...http://dx.doi.org/10.13039/501100006636https://ror.org/02d290r06sehttp://www.forte.se/455.0
978Innovate UKhttp://dx.doi.org/10.13039/501100000266https://ror.org/05ar5fy68gbhttps://www.gov.uk/government/organisations/in...1267.0
1048Diabetes UKhttp://dx.doi.org/10.13039/501100000361https://ror.org/050rgn017gbhttp://www.diabetes.org.uk/492.0
1052Marie Curiehttp://dx.doi.org/10.13039/501100000654https://ror.org/02aqv1x10gbhttp://www.mariecurie.org.uk/595.0
1055Action on Hearing Losshttp://dx.doi.org/10.13039/501100000703https://ror.org/05w6qh410gbhttp://www.actiononhearingloss.org.uk/412.0
1056Alzheimer's Societyhttp://dx.doi.org/10.13039/501100000320https://ror.org/0472gwq90gbhttp://alzheimers.org.uk/443.0
1063Multiple Sclerosis Societyhttp://dx.doi.org/10.13039/501100000381https://ror.org/043fwdk81gbhttp://www.mssociety.org.uk/745.0
1064Myrovlytis Trusthttp://dx.doi.org/10.13039/501100001291https://ror.org/05bj02613gbhttp://www.myrovlytistrust.org/858.0
1065National Centre for the Replacement, Refinemen...http://dx.doi.org/10.13039/501100000849https://ror.org/02w0kg036gbhttp://www.nc3rs.org.uk/859.0
1072Worldwide Cancer Reseachhttp://dx.doi.org/10.13039/100004423https://ror.org/031tfbz57gbhttp://www.worldwidecancerresearch.org/425.0
2219Canadian Institutes of Health Research (CIHR)http://dx.doi.org/10.13039/501100000024https://ror.org/01gavpb45cahttp://www.cihr-irsc.gc.ca/28.0
5490US Department of Energy (DOE)http://dx.doi.org/10.13039/100000015https://ror.org/01bj3aw27ushttp://energy.gov/962.0
5491Agency for Healthcare Research and Quality (AHRQ)http://dx.doi.org/10.13039/100000133https://ror.org/03jmfdf59ushttp://www.ahrq.gov/index.html981.0
5492Institute of Education Sciences (IES)http://dx.doi.org/10.13039/100005246https://ror.org/04et59085ushttp://ies.ed.gov/291.0
5493National Aeronautics and Space Administration ...http://dx.doi.org/10.13039/100000104https://ror.org/027ka1x80ushttp://science.nasa.gov/986.0
5494National Science Foundation (NSF)http://dx.doi.org/10.13039/100000001https://ror.org/021nxhr62ushttp://www.nsf.gov/354.0
7232Academy of Medical Sciencehttp://dx.doi.org/10.13039/501100000691https://ror.org/00c489v88gbhttps://acmedsci.ac.uk/1125.0
7239Prostate Cancer UKhttp://dx.doi.org/10.13039/501100000771https://ror.org/04dkv6329gbhttp://prostatecanceruk.org/742.0
7240Schweizerischer Nationalfonds zur Förderung de...http://dx.doi.org/10.13039/501100001711https://ror.org/00yjd3n13chhttp://www.snf.ch/de/Seiten/default.aspx25.0
-
- - - - -```python -# lien avec les pays -country = pd.read_csv('sample/country.tsv', encoding='utf-8', header=0, sep='\t') -country -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
nameiso_codeid
0AfghanistanAF1
1AlbaniaAL2
2AlgeriaDZ3
3American SamoaAS4
4AndorraAD5
............
246ZambiaZM247
247ZimbabweZW248
248Åland IslandsAX249
249International AgencyOI250
250UNKNOWN__999999
-

251 rows × 3 columns

-
- - - - -```python -# merge avec les pays -organization_funders['iso_code'] = organization_funders['iso_code'].str.upper() -organization_funders['is_funder'] = 1 -organization_funders = pd.merge(organization_funders, country[['iso_code', 'id']], how='left', on='iso_code') -organization_funders -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namefundrefroriso_codewebsitesherpa_idis_funderid
0National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88UShttp://www.nih.gov/9.01236
1Wellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08GBhttp://www.wellcome.ac.uk/695.01234
2British Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04GBhttp://www.bhf.org.uk/18.01234
3Versus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469GBhttps://www.versusarthritis.org/14.01234
4Biotechnology and Biological Sciences Research...http://dx.doi.org/10.13039/501100000268https://ror.org/00cwqg982GBhttp://www.bbsrc.ac.uk/home/home.aspx709.01234
5Blood Cancer UKhttp://dx.doi.org/10.13039/501100007903https://ror.org/0055acf80GBhttps://bloodcancer.org.uk/925.01234
6Bill & Melinda Gates Foundationhttp://dx.doi.org/10.13039/100000865https://ror.org/0456r8d26UShttp://www.gatesfoundation.org/961.01236
7Cancer Research UKhttp://dx.doi.org/10.13039/501100000289https://ror.org/054225q67GBhttp://www.cancerresearchuk.org/19.01234
8Chief Scientist Office, Scottish Executive (CSO)http://dx.doi.org/10.13039/501100000589https://ror.org/01613vh25GBhttp://www.cso.scot.nhs.uk/16.01234
9Department of Health (DH)http://dx.doi.org/10.13039/501100000272https://ror.org/0187kwz08GBhttp://www.dh.gov.uk/en/index.htm943.01234
10Dunhill Medical Trust (DMT)http://dx.doi.org/10.13039/501100000377https://ror.org/05ayqqv15GBhttps://dunhillmedical.org.uk/410.01234
11European Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90BEhttp://erc.europa.eu/31.0121
12Medical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517GBhttp://www.mrc.ac.uk/index.htm705.01234
13Motor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61GBhttp://www.mndassociation.org/562.01234
14Parkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338GBhttp://www.parkinsons.org.uk/411.01234
15Telethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18IThttps://www.telethon.it/en/325.01110
16Howard Hughes Medical Institute (HHMI)http://dx.doi.org/10.13039/100000011https://ror.org/006w34k90UShttp://www.hhmi.org/24.01236
17Arts and Humanities Research Council (AHRC)http://dx.doi.org/10.13039/501100000267https://ror.org/0505m1554GBhttp://www.ahrc.ac.uk/Pages/Home.aspx698.01234
18Austrian Science Fund (FWF)http://dx.doi.org/10.13039/501100002428https://ror.org/013tf3c58AThttp://www.fwf.ac.at/en/13.0114
19Breast Cancer Nowhttp://dx.doi.org/10.13039/501100007913https://ror.org/02qa92s63GBhttp://breastcancernow.org/1065.01234
20Engineering and Physical Sciences Research Cou...http://dx.doi.org/10.13039/501100000266https://ror.org/0439y7842GBhttp://www.epsrc.ac.uk/Pages/default.aspx722.01234
21Natural Environment Research Council (NERC)http://dx.doi.org/10.13039/501100000270https://ror.org/02b5d8509GBhttps://nerc.ukri.org/726.01234
22Science and Technology Facilities Council (STFC)http://dx.doi.org/10.13039/501100000271https://ror.org/057g20z61GBhttp://www.stfc.ac.uk/716.01234
23Vetenskapsrådethttp://dx.doi.org/10.13039/501100004359https://ror.org/03zttf063SEhttp://www.vr.se/302.01214
24World Health Organization (WHO)http://dx.doi.org/10.13039/100004423https://ror.org/01f80g185CHhttp://www.who.int/903.01215
25World Bankhttp://dx.doi.org/10.13039/100004421https://ror.org/00ae7jd04UShttp://www.worldbank.org/525.01236
26Yorkshire Cancer Researchhttp://dx.doi.org/10.13039/501100002653https://ror.org/02cddnn97GBhttp://www.yorkshirecancerresearch.org.uk/428.01234
27Economic and Social Research Council (ESRC)http://dx.doi.org/10.13039/501100000269https://ror.org/03n0ht308GBhttp://www.esrc.ac.uk/717.01234
28Higher Education Funding Council for England (...http://dx.doi.org/10.13039/501100000384https://ror.org/02wxr8x18GBhttp://www.hefce.ac.uk/877.01234
29Higher Education Funding Council for Wales (HE...http://dx.doi.org/10.13039/501100000383https://ror.org/056y81r79GBhttp://www.hefcw.ac.uk/home/home.aspx881.01234
30Scottish Funding Council (SFC)http://dx.doi.org/10.13039/501100000360https://ror.org/056bwcz71GBhttp://www.sfc.ac.uk/887.01234
31Department for the Economy, Northern Irelandhttp://dx.doi.org/10.13039/100008303https://ror.org/05w9mt194GBhttps://www.economy-ni.gov.uk/884.01234
32Academy of Finlandhttp://dx.doi.org/10.13039/501100002341https://ror.org/05k73zm37FIhttps://www.aka.fi/en/1248.0175
33Agence Nationale de la Recherche (ANR)http://dx.doi.org/10.13039/501100001665https://ror.org/00rbzpz17FRhttp://www.agence-nationale-recherche.fr/30.0176
34Fundação para a Ciência e a Tecnologiahttp://dx.doi.org/10.13039/501100001871https://ror.org/00snfqn58PThttp://www.fct.pt/1109.01178
35Formashttp://dx.doi.org/10.13039/501100001862https://ror.org/03pjs1y45SEhttp://www.formas.se/452.01214
36Nederlandse Organisatie voor Wetenschappelijk ...http://dx.doi.org/10.13039/501100003246https://ror.org/04jsz6e67NLhttp://www.nwo.nl/459.01156
37Science Foundation Ireland (SFI)http://dx.doi.org/10.13039/501100001602https://ror.org/0271asj38IEhttp://www.sfi.ie/210.01107
38Research Council of Norwayhttp://dx.doi.org/10.13039/501100005416https://ror.org/00epmv149NOhttps://www.forskningsradet.no/en/266.01166
39Forskningsrådet för hälsa, arbetsliv och välfä...http://dx.doi.org/10.13039/501100006636https://ror.org/02d290r06SEhttp://www.forte.se/455.01214
40Innovate UKhttp://dx.doi.org/10.13039/501100000266https://ror.org/05ar5fy68GBhttps://www.gov.uk/government/organisations/in...1267.01234
41Diabetes UKhttp://dx.doi.org/10.13039/501100000361https://ror.org/050rgn017GBhttp://www.diabetes.org.uk/492.01234
42Marie Curiehttp://dx.doi.org/10.13039/501100000654https://ror.org/02aqv1x10GBhttp://www.mariecurie.org.uk/595.01234
43Action on Hearing Losshttp://dx.doi.org/10.13039/501100000703https://ror.org/05w6qh410GBhttp://www.actiononhearingloss.org.uk/412.01234
44Alzheimer's Societyhttp://dx.doi.org/10.13039/501100000320https://ror.org/0472gwq90GBhttp://alzheimers.org.uk/443.01234
45Multiple Sclerosis Societyhttp://dx.doi.org/10.13039/501100000381https://ror.org/043fwdk81GBhttp://www.mssociety.org.uk/745.01234
46Myrovlytis Trusthttp://dx.doi.org/10.13039/501100001291https://ror.org/05bj02613GBhttp://www.myrovlytistrust.org/858.01234
47National Centre for the Replacement, Refinemen...http://dx.doi.org/10.13039/501100000849https://ror.org/02w0kg036GBhttp://www.nc3rs.org.uk/859.01234
48Worldwide Cancer Reseachhttp://dx.doi.org/10.13039/100004423https://ror.org/031tfbz57GBhttp://www.worldwidecancerresearch.org/425.01234
49Canadian Institutes of Health Research (CIHR)http://dx.doi.org/10.13039/501100000024https://ror.org/01gavpb45CAhttp://www.cihr-irsc.gc.ca/28.0140
50US Department of Energy (DOE)http://dx.doi.org/10.13039/100000015https://ror.org/01bj3aw27UShttp://energy.gov/962.01236
51Agency for Healthcare Research and Quality (AHRQ)http://dx.doi.org/10.13039/100000133https://ror.org/03jmfdf59UShttp://www.ahrq.gov/index.html981.01236
52Institute of Education Sciences (IES)http://dx.doi.org/10.13039/100005246https://ror.org/04et59085UShttp://ies.ed.gov/291.01236
53National Aeronautics and Space Administration ...http://dx.doi.org/10.13039/100000104https://ror.org/027ka1x80UShttp://science.nasa.gov/986.01236
54National Science Foundation (NSF)http://dx.doi.org/10.13039/100000001https://ror.org/021nxhr62UShttp://www.nsf.gov/354.01236
55Academy of Medical Sciencehttp://dx.doi.org/10.13039/501100000691https://ror.org/00c489v88GBhttps://acmedsci.ac.uk/1125.01234
56Prostate Cancer UKhttp://dx.doi.org/10.13039/501100000771https://ror.org/04dkv6329GBhttp://prostatecanceruk.org/742.01234
57Schweizerischer Nationalfonds zur Förderung de...http://dx.doi.org/10.13039/501100001711https://ror.org/00yjd3n13CHhttp://www.snf.ch/de/Seiten/default.aspx25.01215
-
- - - - -```python -organization_funders = organization_funders.rename(columns = {'id' : 'country'}) -organization_funders -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namefundrefroriso_codewebsitesherpa_idis_fundercountry
0National Institutes of Health (NIH)http://dx.doi.org/10.13039/100000002https://ror.org/01cwqze88UShttp://www.nih.gov/9.01236
1Wellcome Trusthttp://dx.doi.org/10.13039/100004440https://ror.org/029chgv08GBhttp://www.wellcome.ac.uk/695.01234
2British Heart Foundation (BHF)http://dx.doi.org/10.13039/501100000274https://ror.org/02wdwnk04GBhttp://www.bhf.org.uk/18.01234
3Versus Arthritishttp://dx.doi.org/10.13039/501100000341https://ror.org/02jkpm469GBhttps://www.versusarthritis.org/14.01234
4Biotechnology and Biological Sciences Research...http://dx.doi.org/10.13039/501100000268https://ror.org/00cwqg982GBhttp://www.bbsrc.ac.uk/home/home.aspx709.01234
5Blood Cancer UKhttp://dx.doi.org/10.13039/501100007903https://ror.org/0055acf80GBhttps://bloodcancer.org.uk/925.01234
6Bill & Melinda Gates Foundationhttp://dx.doi.org/10.13039/100000865https://ror.org/0456r8d26UShttp://www.gatesfoundation.org/961.01236
7Cancer Research UKhttp://dx.doi.org/10.13039/501100000289https://ror.org/054225q67GBhttp://www.cancerresearchuk.org/19.01234
8Chief Scientist Office, Scottish Executive (CSO)http://dx.doi.org/10.13039/501100000589https://ror.org/01613vh25GBhttp://www.cso.scot.nhs.uk/16.01234
9Department of Health (DH)http://dx.doi.org/10.13039/501100000272https://ror.org/0187kwz08GBhttp://www.dh.gov.uk/en/index.htm943.01234
10Dunhill Medical Trust (DMT)http://dx.doi.org/10.13039/501100000377https://ror.org/05ayqqv15GBhttps://dunhillmedical.org.uk/410.01234
11European Research Council (ERC)http://dx.doi.org/10.13039/501100000781https://ror.org/0472cxd90BEhttp://erc.europa.eu/31.0121
12Medical Research Council (MRC)http://dx.doi.org/10.13039/501100000265https://ror.org/03x94j517GBhttp://www.mrc.ac.uk/index.htm705.01234
13Motor Neuron Disease Association (MND Associat...http://dx.doi.org/10.13039/501100000406https://ror.org/02gq0fg61GBhttp://www.mndassociation.org/562.01234
14Parkinson's UKhttp://dx.doi.org/10.13039/501100000304https://ror.org/02417p338GBhttp://www.parkinsons.org.uk/411.01234
15Telethon Foundationhttp://dx.doi.org/10.13039/501100002426https://ror.org/04xraxn18IThttps://www.telethon.it/en/325.01110
16Howard Hughes Medical Institute (HHMI)http://dx.doi.org/10.13039/100000011https://ror.org/006w34k90UShttp://www.hhmi.org/24.01236
17Arts and Humanities Research Council (AHRC)http://dx.doi.org/10.13039/501100000267https://ror.org/0505m1554GBhttp://www.ahrc.ac.uk/Pages/Home.aspx698.01234
18Austrian Science Fund (FWF)http://dx.doi.org/10.13039/501100002428https://ror.org/013tf3c58AThttp://www.fwf.ac.at/en/13.0114
19Breast Cancer Nowhttp://dx.doi.org/10.13039/501100007913https://ror.org/02qa92s63GBhttp://breastcancernow.org/1065.01234
20Engineering and Physical Sciences Research Cou...http://dx.doi.org/10.13039/501100000266https://ror.org/0439y7842GBhttp://www.epsrc.ac.uk/Pages/default.aspx722.01234
21Natural Environment Research Council (NERC)http://dx.doi.org/10.13039/501100000270https://ror.org/02b5d8509GBhttps://nerc.ukri.org/726.01234
22Science and Technology Facilities Council (STFC)http://dx.doi.org/10.13039/501100000271https://ror.org/057g20z61GBhttp://www.stfc.ac.uk/716.01234
23Vetenskapsrådethttp://dx.doi.org/10.13039/501100004359https://ror.org/03zttf063SEhttp://www.vr.se/302.01214
24World Health Organization (WHO)http://dx.doi.org/10.13039/100004423https://ror.org/01f80g185CHhttp://www.who.int/903.01215
25World Bankhttp://dx.doi.org/10.13039/100004421https://ror.org/00ae7jd04UShttp://www.worldbank.org/525.01236
26Yorkshire Cancer Researchhttp://dx.doi.org/10.13039/501100002653https://ror.org/02cddnn97GBhttp://www.yorkshirecancerresearch.org.uk/428.01234
27Economic and Social Research Council (ESRC)http://dx.doi.org/10.13039/501100000269https://ror.org/03n0ht308GBhttp://www.esrc.ac.uk/717.01234
28Higher Education Funding Council for England (...http://dx.doi.org/10.13039/501100000384https://ror.org/02wxr8x18GBhttp://www.hefce.ac.uk/877.01234
29Higher Education Funding Council for Wales (HE...http://dx.doi.org/10.13039/501100000383https://ror.org/056y81r79GBhttp://www.hefcw.ac.uk/home/home.aspx881.01234
30Scottish Funding Council (SFC)http://dx.doi.org/10.13039/501100000360https://ror.org/056bwcz71GBhttp://www.sfc.ac.uk/887.01234
31Department for the Economy, Northern Irelandhttp://dx.doi.org/10.13039/100008303https://ror.org/05w9mt194GBhttps://www.economy-ni.gov.uk/884.01234
32Academy of Finlandhttp://dx.doi.org/10.13039/501100002341https://ror.org/05k73zm37FIhttps://www.aka.fi/en/1248.0175
33Agence Nationale de la Recherche (ANR)http://dx.doi.org/10.13039/501100001665https://ror.org/00rbzpz17FRhttp://www.agence-nationale-recherche.fr/30.0176
34Fundação para a Ciência e a Tecnologiahttp://dx.doi.org/10.13039/501100001871https://ror.org/00snfqn58PThttp://www.fct.pt/1109.01178
35Formashttp://dx.doi.org/10.13039/501100001862https://ror.org/03pjs1y45SEhttp://www.formas.se/452.01214
36Nederlandse Organisatie voor Wetenschappelijk ...http://dx.doi.org/10.13039/501100003246https://ror.org/04jsz6e67NLhttp://www.nwo.nl/459.01156
37Science Foundation Ireland (SFI)http://dx.doi.org/10.13039/501100001602https://ror.org/0271asj38IEhttp://www.sfi.ie/210.01107
38Research Council of Norwayhttp://dx.doi.org/10.13039/501100005416https://ror.org/00epmv149NOhttps://www.forskningsradet.no/en/266.01166
39Forskningsrådet för hälsa, arbetsliv och välfä...http://dx.doi.org/10.13039/501100006636https://ror.org/02d290r06SEhttp://www.forte.se/455.01214
40Innovate UKhttp://dx.doi.org/10.13039/501100000266https://ror.org/05ar5fy68GBhttps://www.gov.uk/government/organisations/in...1267.01234
41Diabetes UKhttp://dx.doi.org/10.13039/501100000361https://ror.org/050rgn017GBhttp://www.diabetes.org.uk/492.01234
42Marie Curiehttp://dx.doi.org/10.13039/501100000654https://ror.org/02aqv1x10GBhttp://www.mariecurie.org.uk/595.01234
43Action on Hearing Losshttp://dx.doi.org/10.13039/501100000703https://ror.org/05w6qh410GBhttp://www.actiononhearingloss.org.uk/412.01234
44Alzheimer's Societyhttp://dx.doi.org/10.13039/501100000320https://ror.org/0472gwq90GBhttp://alzheimers.org.uk/443.01234
45Multiple Sclerosis Societyhttp://dx.doi.org/10.13039/501100000381https://ror.org/043fwdk81GBhttp://www.mssociety.org.uk/745.01234
46Myrovlytis Trusthttp://dx.doi.org/10.13039/501100001291https://ror.org/05bj02613GBhttp://www.myrovlytistrust.org/858.01234
47National Centre for the Replacement, Refinemen...http://dx.doi.org/10.13039/501100000849https://ror.org/02w0kg036GBhttp://www.nc3rs.org.uk/859.01234
48Worldwide Cancer Reseachhttp://dx.doi.org/10.13039/100004423https://ror.org/031tfbz57GBhttp://www.worldwidecancerresearch.org/425.01234
49Canadian Institutes of Health Research (CIHR)http://dx.doi.org/10.13039/501100000024https://ror.org/01gavpb45CAhttp://www.cihr-irsc.gc.ca/28.0140
50US Department of Energy (DOE)http://dx.doi.org/10.13039/100000015https://ror.org/01bj3aw27UShttp://energy.gov/962.01236
51Agency for Healthcare Research and Quality (AHRQ)http://dx.doi.org/10.13039/100000133https://ror.org/03jmfdf59UShttp://www.ahrq.gov/index.html981.01236
52Institute of Education Sciences (IES)http://dx.doi.org/10.13039/100005246https://ror.org/04et59085UShttp://ies.ed.gov/291.01236
53National Aeronautics and Space Administration ...http://dx.doi.org/10.13039/100000104https://ror.org/027ka1x80UShttp://science.nasa.gov/986.01236
54National Science Foundation (NSF)http://dx.doi.org/10.13039/100000001https://ror.org/021nxhr62UShttp://www.nsf.gov/354.01236
55Academy of Medical Sciencehttp://dx.doi.org/10.13039/501100000691https://ror.org/00c489v88GBhttps://acmedsci.ac.uk/1125.01234
56Prostate Cancer UKhttp://dx.doi.org/10.13039/501100000771https://ror.org/04dkv6329GBhttp://prostatecanceruk.org/742.01234
57Schweizerischer Nationalfonds zur Förderung de...http://dx.doi.org/10.13039/501100001711https://ror.org/00yjd3n13CHhttp://www.snf.ch/de/Seiten/default.aspx25.01215
-
- - - - -```python -# ajout des organizations suisses -organization = pd.read_csv('ror/ror_ch_hei_export.tsv', encoding='utf-8', header=0, sep='\t', dtype={'fundref': str, 'orgref': str}, na_filter=False) -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
0https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
1https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
2https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
3https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
4https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
5https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
6https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
7https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
8https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
9https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
10https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
11https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
12https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
13https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
14https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
15https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
16https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
17https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
18https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
19https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
20https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
21https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
22https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
23https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
24https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
25https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
26https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
27https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
28https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
29https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
30https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
31https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
32https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
33https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
34https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
35https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
36https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
37https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
38https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
39https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
40https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
41https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
42https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
43https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
44https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
45https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
-
- - - - -```python -# tri par nom -organization = organization.sort_values(by='name') -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
28https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
27https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
23https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
26https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
2https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
3https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
5https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
6https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
7https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
11https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
12https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
9https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
13https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
14https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
16https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
17https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
19https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
22https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
45https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
20https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
21https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
25https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
37https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
39https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
1https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
4https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
38https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
10https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
0https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
40https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
41https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
30https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
31https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
32https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
42https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
43https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
33https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
44https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
18https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
36https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
29https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
35https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
8https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
15https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
34https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
24https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
-
- - - - -```python -organization = organization.reset_index(drop=True) -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
0https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
1https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
2https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
3https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
4https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
5https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
6https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
7https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
8https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
9https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
10https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
11https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
12https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
13https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
14https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
15https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
16https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
17https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
18https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
19https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
20https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
21https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
22https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
23https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
24https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
25https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
26https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
27https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
28https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
29https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
30https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
31https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
32https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
33https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
34https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
35https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
36https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
37https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
38https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
39https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
40https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
41https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
42https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
43https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
44https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
45https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
-
- - - - -```python -# mettre l'EPFL en position 1 et UNIGE en 2 -target_row = 32 -# Move target row to first element of list. -idx = [target_row] + [i for i in range(len(organization)) if i != target_row] -organization = organization.iloc[idx] -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
32https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
0https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
1https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
2https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
3https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
4https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
5https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
6https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
7https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
8https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
9https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
10https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
11https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
12https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
13https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
14https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
15https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
16https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
17https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
18https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
19https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
20https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
21https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
22https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
23https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
24https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
25https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
26https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
27https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
28https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
29https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
30https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
31https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
33https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
34https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
35https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
36https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
37https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
38https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
39https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
40https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
41https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
42https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
43https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
44https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
45https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
-
- - - - -```python -organization = organization.reset_index(drop=True) -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
0https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
1https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
2https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
3https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
4https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
5https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
6https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
7https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
8https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
9https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
10https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
11https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
12https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
13https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
14https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
15https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
16https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
17https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
18https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
19https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
20https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
21https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
22https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
23https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
24https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
25https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
26https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
27https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
28https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
29https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
30https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
31https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
32https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
33https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
34https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
35https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
36https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
37https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
38https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
39https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
40https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
41https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
42https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
43https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
44https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
45https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
-
- - - - -```python -# mettre l'EPFL en position 1 et UNIGE en 2 -target_row = 45 -# Move target row to first element of list. -idx = [target_row] + [i for i in range(len(organization)) if i != target_row] -organization = organization.iloc[idx] -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
45https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
0https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
1https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
2https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
3https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
4https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
5https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
6https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
7https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
8https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
9https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
10https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
11https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
12https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
13https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
14https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
15https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
16https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
17https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
18https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
19https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
20https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
21https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
22https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
23https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
24https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
25https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
26https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
27https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
28https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
29https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
30https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
31https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
32https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
33https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
34https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
35https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
36https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
37https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
38https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
39https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
40https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
41https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
42https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
43https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
44https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
-
- - - - -```python -organization = organization.reset_index(drop=True) -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rornamelabel_enlabel_frlabel_delabel_itwebsitecountrystarting_yearis_funderacronymaliasesisnifundreforgrefwikidatagrid
0https://ror.org/02s376052École Polytechnique Fédérale de Lausanne (EPFL)Swiss Federal Institute of Technology in Lausannehttp://www.epfl.ch/index.en.html21518530EPFL000000012183904950110000170371968Q262760grid.5333.6
1https://ror.org/01swzsf04University of Geneva (UNIGE)Université de GenèveUniversità di Ginevrahttps://www.unige.ch/21515590UNIGESchola Genevensis0000 0001 2322 4988501100006389342348Q503473grid.8591.5
2https://ror.org/04d8ztx87Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...215185000000 0004 4681 910XQ397466grid.417771.3
3https://ror.org/02bnkt322Bern University of Applied Sciences (BFH)Haute école spécialisée bernoiseBerner Fachhochschulehttp://www.bfh.ch/en/home.html21519970BFH0000 0001 0688 67795011000062594365265Q466455grid.424060.4
4https://ror.org/05a28rw58ETH Zurich (ETH Zurich)École Polytechnique Fédérale de ZurichEidgenössische Technische Hochschule ZürichPolitecnico federale di Zurigohttps://www.ethz.ch/en.html21518550ETH ZurichSwiss Federal Institute of Technology in Zuric...0000 0001 2156 2780501100003006210910Q11942grid.5801.c
5https://ror.org/01ggx4157European Organization for Nuclear Research (CERN)Organisation européenne pour la recherche nucl...Europäische Organisation für Kernforschunghttp://home.web.cern.ch/21519540CERN0000 0001 2156 142X37351Q42944grid.9132.9
6https://ror.org/0210tb741Forschungsinstitut für biologischen Landbau (F...https://www.fibl.org/en/germany/location-de.html2150FiBLgrid.506220.3
7https://ror.org/007ygn379Graduate Institute of International and Develo...Institut de Hautes études Internationales et d...Hochschulinstitut für internationale Studien u...http://graduateinstitute.ch/home.html21519270IHEIDGraduate Institute Geneva0000 0001 2296 987314744053Q691686grid.424404.2
8https://ror.org/015pmkr43Haute École Pédagogique BEJUNE (HEP BEJUNE)http://www.hep-bejune.ch/21520010HEP BEJUNE0000 0001 0658 3479grid.469449.2
9https://ror.org/048gre751Haute École Pédagogique Fribourg (HEP-PH FR)https://www.hepfr.ch/21519900HEP-PH FR0000 0001 0266 4909grid.469451.b
10https://ror.org/01bvm0h13Haute École Pédagogique du Canton de Vaud (HEP...http://www.hepl.ch/cms/accueil.html21520010HEP Vaud0000 0004 0613 4050grid.466224.0
11https://ror.org/049c2kr37Kalaidos University of Applied Sciences (Kalai...Kalaidos Fachhochschulehttps://www.kalaidos-fh.ch/de-CH21519950Kalaidos UAS0000 0004 0453 90546746630Q681372grid.449532.d
12https://ror.org/021f7p178Lib4RI - Library for the Research Institutes w...http://www.lib4ri.ch/21520110Lib4RI0000 0004 0624 8541Q1278450grid.458352.d
13https://ror.org/04nd0xd48Lucerne University of Applied Sciences and ArtsHaute École de lucerneHochschule Luzernhttps://www.hslu.ch/en/215199700000 0001 2191 894319480920Q664028grid.425064.1
14https://ror.org/00p9jf779Medicines for Malaria Venture (MMV)http://www.mmv.org/21519990MMV0000 0004 0432 5267501100004167Q6806774grid.452605.0
15https://ror.org/038mj2660Ostschweizer Fachhochschule OSTEastern Switzerland University of Applied Scie...https://www.ost.ch/21519990grid.510272.3
16https://ror.org/05jf1ma54Pädagogische Hochschule BernBern University of Teacher Educationhttps://www.phbern.ch21520050PHBern0000 0000 8585 5665grid.454333.6
17https://ror.org/02fjgft97Pädagogische Hochschule Graubünden (PHGR)Alta scuola pedagogica dei Grigionihttp://www.phgr.ch/2150PHGR0000 0000 9317 283Xgrid.469478.0
18https://ror.org/03fs41j10Pädagogische Hochschule Schaffhausen (PHSH)http://www.phsh.ch/21520030PHSH0000 0004 0450 7546grid.466133.5
19https://ror.org/04bf6dq94Pädagogische Hochschule Thurgau (PHTG)http://www.phtg.ch/home/21520030PHTG0000 0004 0613 3824grid.466322.7
20https://ror.org/040gs8e06Pädagogische Hochschule Wallis (PH-VS)Haute École Pédagogique du Valaishttp://www.hepvs.ch/de21520000PH-VS0000 0001 2178 3217grid.466216.1
21https://ror.org/00rqdn375Schwyz University of Teacher Education (PHSZ)Pädagogische Hochschule Schwyzhttps://www.phsz.ch/en/2150PHSZPHZ Schwyz0000 0004 0613 7454grid.466169.a
22https://ror.org/05m37v666St.Gallen University of Teacher Education (PHSG)Pädagogische Hochschule St. Gallenhttps://www.phsg.ch/en21520070PHSG0000 0001 0271 5139Q1768652grid.466208.e
23https://ror.org/00zg4za48Swiss Federal Institute for Vocational Educati...Institut Fédéral des Hautes Études en Formatio...Eidgenössisches Hochschulinstitut für Berufsbi...http://www.ehb-schweiz.ch/en/21520070SFIVET0000 0001 2285 5681Q1302632grid.466173.1
24https://ror.org/03mcsbr76Swiss Ornithological InstituteSchweizerische Vogelwartehttp://www.vogelwarte.ch/de/home/215192400000 0001 1512 3677Q663638grid.419767.a
25https://ror.org/03c4atk17Universita della Svizzera Italiana (USI)University of Italian SwitzerlandUniversité de la suisse italienneUniversità della Svizzera italianahttp://www.usi.ch/en/index.htm21519960USI0000 0001 2203 28612290642Q689617grid.29078.34
26https://ror.org/04mq2g308University of Applied Sciences and Arts Northw...http://www.fhnw.ch/homepage21520060FHNWFachhochschule Nordwestschweiz0000 0001 1497 8091grid.410380.e
27https://ror.org/01xkakk17University of Applied Sciences and Arts Wester...Haute École Spécialisée de Suisse OccidentaleFachhochschule Westschweizhttp://www.hes-so.ch/en/homepage-hes-so-1679.html21519980HES-SO0000 0001 0943 199910128956Q168003grid.5681.a
28https://ror.org/05ep8g269University of Applied Sciences and Arts of Sou...Scuola Universitaria Professionale della Svizz...http://www.supsi.ch/home_en.html21519970SUPSI000000012325223334066841Q663984grid.16058.3a
29https://ror.org/00w9q2c06University of Applied Sciences of Special Need...Interkantonale Hochschule für Heilpädagogikhttp://www.hfh.ch/en/21519240HfHZurich Training College for Teachers of Specia...0000 0001 0710 6332grid.466279.8
30https://ror.org/032ymzc07University of Applied Sciences of the GrisonsFachhochschule Graubündenhttps://www.fhgr.ch/en/21519630Hochschule für Technik und Wirtschaft Chur0000 0000 8718 2812Q1622220grid.460104.7
31https://ror.org/02s6k3f65University of BaselUniversité de bâleUniversität BaselUniversità di Basileahttps://www.unibas.ch/de215146000000 0004 1937 0642100008375427614Q372608grid.6612.3
32https://ror.org/02k7v4d05University of Bern (UB)Université de BerneUniversität BernUniversità di Bernahttp://www.unibe.ch/eng/21518340UB0000 0001 0726 51571000090681157515Q659080grid.5734.5
33https://ror.org/022fs9h90University of FribourgUniversité de FribourgUniversität FreiburgUniversità di Friburgohttp://www.unifr.ch/home/welcomeE.php215188900000 0004 0478 1713501100005869535267Q36188grid.8534.a
34https://ror.org/019whta54University of Lausanne (UNIL)Université de LausanneUniversität LausanneUniversità di Losannahttp://www.unil.ch/central/en/home.html21515370UNILSchola Lausannensis0000 0001 2165 420450110000639079810Q658975grid.9851.5
35https://ror.org/01qjrx392University of LiechtensteinUniversität Liechtensteinhttps://www.uni.li/study/de/128196100000 0001 2227 466810554064Q974328grid.445905.9
36https://ror.org/00kgrkn83University of Lucerne (UNILU)Université de lucerneUniversität LuzernUniversità di Lucernahttp://www.unilu.ch/21520000UNILU0000 0001 1456 793821004764Q673308grid.449852.6
37https://ror.org/00vasag41University of NeuchâtelUniversité de neuchâtelUniversität Neuenburghttp://www2.unine.ch/215183800000 0001 2297 77185011000053533662101Q541548grid.10711.36
38https://ror.org/0561a3s31University of St. Gallen (HSG)Université de saint-gallUniversität St. GallenUniversità di San Gallohttp://www.es.unisg.ch/en/21518980HSG0000 0001 2156 6618100009572751473Q673354grid.15775.31
39https://ror.org/0235ynq74University of Teacher Education LucernePädagogische Hochschule Luzernhttp://www.phlu.ch/ute-lucerne/21520030PH Luzern0000 0001 0348 1637grid.465965.d
40https://ror.org/05ghhx264University of Teacher Education Zug (PH Zug)Pädagogische Hochschule Zughttps://www.zg.ch/behoerden/direktion-fur-bild...21520130PH Zug0000 0004 0449 2225grid.466274.5
41https://ror.org/02crff812University of Zurich (UZH)Université de zurichUniversität ZürichUniversità di Zurigohttp://www.uzh.ch/index_en.html21518330UZH0000 0004 1937 0650501100006447314803Q206702grid.7400.3
42https://ror.org/05pmsvm27Zurich University of Applied Sciences (ZHAW)Zürcher Hochschule für Angewandte Wissenschaftenhttps://www.zhaw.ch/en/university/21520070ZHAW000000012229164430930550Q2605554grid.19739.35
43https://ror.org/02ejkey04Zurich University of Applied Sciences in Busin...Hochschule für Wirtschaft Zürichhttp://www.fh-hwz.ch/en21519860HWZ0000 0001 0008 371330805829Q1488771grid.449909.9
44https://ror.org/01awgk221Zurich University of Teacher Education (PHZH)Pädagogische Hochschule Zürichhttps://phzh.ch/en/21520020PHZHPH Zürich0000 0000 9666 1858grid.483054.e
45https://ror.org/05r0ap620Zurich University of the ArtsHaute École d'Art de ZurichZürcher Hochschule der Künstehttps://www.zhdk.ch/2152007039250592Q222450grid.449912.3
-
- - - - -```python -# ajout des funders -organization = organization.append(organization_funders, ignore_index=True) -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
acronymaliasescountryfundrefgridis_funderisniiso_codelabel_delabel_enlabel_frlabel_itnameorgrefrorsherpa_idstarting_yearwebsitewikidata
0EPFL215501100001703grid.5333.600000000121839049NaNSwiss Federal Institute of Technology in LausanneÉcole Polytechnique Fédérale de Lausanne (EPFL)71968https://ror.org/02s376052NaN1853http://www.epfl.ch/index.en.htmlQ262760
1UNIGESchola Genevensis215501100006389grid.8591.500000 0001 2322 4988NaNUniversité de GenèveUniversità di GinevraUniversity of Geneva (UNIGE)342348https://ror.org/01swzsf04NaN1559https://www.unige.ch/Q503473
2215grid.417771.300000 0004 4681 910XNaNAgroscopehttps://ror.org/04d8ztx87NaN1850https://www.agroscope.admin.ch/agroscope/en/ho...Q397466
3BFH215501100006259grid.424060.400000 0001 0688 6779NaNBerner FachhochschuleHaute école spécialisée bernoiseBern University of Applied Sciences (BFH)4365265https://ror.org/02bnkt322NaN1997http://www.bfh.ch/en/home.htmlQ466455
4ETH ZurichSwiss Federal Institute of Technology in Zuric...215501100003006grid.5801.c00000 0001 2156 2780NaNEidgenössische Technische Hochschule ZürichÉcole Polytechnique Fédérale de ZurichPolitecnico federale di ZurigoETH Zurich (ETH Zurich)210910https://ror.org/05a28rw58NaN1855https://www.ethz.ch/en.htmlQ11942
............................................................
99NaNNaN236http://dx.doi.org/10.13039/100000104NaN1NaNUSNaNNaNNaNNaNNational Aeronautics and Space Administration ...NaNhttps://ror.org/027ka1x80986.0NaNhttp://science.nasa.gov/NaN
100NaNNaN236http://dx.doi.org/10.13039/100000001NaN1NaNUSNaNNaNNaNNaNNational Science Foundation (NSF)NaNhttps://ror.org/021nxhr62354.0NaNhttp://www.nsf.gov/NaN
101NaNNaN234http://dx.doi.org/10.13039/501100000691NaN1NaNGBNaNNaNNaNNaNAcademy of Medical ScienceNaNhttps://ror.org/00c489v881125.0NaNhttps://acmedsci.ac.uk/NaN
102NaNNaN234http://dx.doi.org/10.13039/501100000771NaN1NaNGBNaNNaNNaNNaNProstate Cancer UKNaNhttps://ror.org/04dkv6329742.0NaNhttp://prostatecanceruk.org/NaN
103NaNNaN215http://dx.doi.org/10.13039/501100001711NaN1NaNCHNaNNaNNaNNaNSchweizerischer Nationalfonds zur Förderung de...NaNhttps://ror.org/00yjd3n1325.0NaNhttp://www.snf.ch/de/Seiten/default.aspxNaN
-

104 rows × 19 columns

-
- - - - -```python -# remplacement dans le fundref id qui renvoie vers du JSON seulement -# URL actuel : http://data.crossref.org/fundingdata/funder/10.13039/[fundref id] -# ex : http://dx.doi.org/10.13039/501100007903 -# redirigé sur : http://data.crossref.org/fundingdata/funder/10.13039/501100007903 -# URL des publications financées : https://search.crossref.org/funding?q=[fundref id]&from_ui=yes -# ex : https://search.crossref.org/funding?q=501100003006&from_ui=yes -organization['fundref'] = organization['fundref'].str.replace('http://dx.doi.org/10.13039/', '') -organization -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
acronymaliasescountryfundrefgridis_funderisniiso_codelabel_delabel_enlabel_frlabel_itnameorgrefrorsherpa_idstarting_yearwebsitewikidata
0EPFL215501100001703grid.5333.600000000121839049NaNSwiss Federal Institute of Technology in LausanneÉcole Polytechnique Fédérale de Lausanne (EPFL)71968https://ror.org/02s376052NaN1853http://www.epfl.ch/index.en.htmlQ262760
1UNIGESchola Genevensis215501100006389grid.8591.500000 0001 2322 4988NaNUniversité de GenèveUniversità di GinevraUniversity of Geneva (UNIGE)342348https://ror.org/01swzsf04NaN1559https://www.unige.ch/Q503473
2215grid.417771.300000 0004 4681 910XNaNAgroscopehttps://ror.org/04d8ztx87NaN1850https://www.agroscope.admin.ch/agroscope/en/ho...Q397466
3BFH215501100006259grid.424060.400000 0001 0688 6779NaNBerner FachhochschuleHaute école spécialisée bernoiseBern University of Applied Sciences (BFH)4365265https://ror.org/02bnkt322NaN1997http://www.bfh.ch/en/home.htmlQ466455
4ETH ZurichSwiss Federal Institute of Technology in Zuric...215501100003006grid.5801.c00000 0001 2156 2780NaNEidgenössische Technische Hochschule ZürichÉcole Polytechnique Fédérale de ZurichPolitecnico federale di ZurigoETH Zurich (ETH Zurich)210910https://ror.org/05a28rw58NaN1855https://www.ethz.ch/en.htmlQ11942
............................................................
99NaNNaN236100000104NaN1NaNUSNaNNaNNaNNaNNational Aeronautics and Space Administration ...NaNhttps://ror.org/027ka1x80986.0NaNhttp://science.nasa.gov/NaN
100NaNNaN236100000001NaN1NaNUSNaNNaNNaNNaNNational Science Foundation (NSF)NaNhttps://ror.org/021nxhr62354.0NaNhttp://www.nsf.gov/NaN
101NaNNaN234501100000691NaN1NaNGBNaNNaNNaNNaNAcademy of Medical ScienceNaNhttps://ror.org/00c489v881125.0NaNhttps://acmedsci.ac.uk/NaN
102NaNNaN234501100000771NaN1NaNGBNaNNaNNaNNaNProstate Cancer UKNaNhttps://ror.org/04dkv6329742.0NaNhttp://prostatecanceruk.org/NaN
103NaNNaN215501100001711NaN1NaNCHNaNNaNNaNNaNSchweizerischer Nationalfonds zur Förderung de...NaNhttps://ror.org/00yjd3n1325.0NaNhttp://www.snf.ch/de/Seiten/default.aspxNaN
-

104 rows × 19 columns

-
- - - - -```python -# df pour l'export -organization_export = organization[['name', 'website', 'country', 'starting_year', 'is_funder', 'ror', 'fundref']] -organization_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namewebsitecountrystarting_yearis_funderrorfundref
0École Polytechnique Fédérale de Lausanne (EPFL)http://www.epfl.ch/index.en.html21518530https://ror.org/02s376052501100001703
1University of Geneva (UNIGE)https://www.unige.ch/21515590https://ror.org/01swzsf04501100006389
2Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...21518500https://ror.org/04d8ztx87
3Bern University of Applied Sciences (BFH)http://www.bfh.ch/en/home.html21519970https://ror.org/02bnkt322501100006259
4ETH Zurich (ETH Zurich)https://www.ethz.ch/en.html21518550https://ror.org/05a28rw58501100003006
........................
99National Aeronautics and Space Administration ...http://science.nasa.gov/236NaN1https://ror.org/027ka1x80100000104
100National Science Foundation (NSF)http://www.nsf.gov/236NaN1https://ror.org/021nxhr62100000001
101Academy of Medical Sciencehttps://acmedsci.ac.uk/234NaN1https://ror.org/00c489v88501100000691
102Prostate Cancer UKhttp://prostatecanceruk.org/234NaN1https://ror.org/04dkv6329501100000771
103Schweizerischer Nationalfonds zur Förderung de...http://www.snf.ch/de/Seiten/default.aspx215NaN1https://ror.org/00yjd3n13501100001711
-

104 rows × 7 columns

-
- - - - -```python -# ajout des valeurs vides -organization_export['starting_year'] = organization_export['starting_year'].fillna(0) -organization_export['fundref'] = organization_export['fundref'].fillna('') -organization_export['ror'] = organization_export['ror'].fillna('') -organization_export -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - This is separate from the ipykernel package so we can avoid doing imports until - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - after removing the cwd from sys.path. - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namewebsitecountrystarting_yearis_funderrorfundref
0École Polytechnique Fédérale de Lausanne (EPFL)http://www.epfl.ch/index.en.html21518530https://ror.org/02s376052501100001703
1University of Geneva (UNIGE)https://www.unige.ch/21515590https://ror.org/01swzsf04501100006389
2Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...21518500https://ror.org/04d8ztx87
3Bern University of Applied Sciences (BFH)http://www.bfh.ch/en/home.html21519970https://ror.org/02bnkt322501100006259
4ETH Zurich (ETH Zurich)https://www.ethz.ch/en.html21518550https://ror.org/05a28rw58501100003006
........................
99National Aeronautics and Space Administration ...http://science.nasa.gov/23601https://ror.org/027ka1x80100000104
100National Science Foundation (NSF)http://www.nsf.gov/23601https://ror.org/021nxhr62100000001
101Academy of Medical Sciencehttps://acmedsci.ac.uk/23401https://ror.org/00c489v88501100000691
102Prostate Cancer UKhttp://prostatecanceruk.org/23401https://ror.org/04dkv6329501100000771
103Schweizerischer Nationalfonds zur Förderung de...http://www.snf.ch/de/Seiten/default.aspx21501https://ror.org/00yjd3n13501100001711
-

104 rows × 7 columns

-
- - - - -```python -# ajout de l'id avec l'index + 1 -organization_export['id'] = organization_export.index + 1 -# del terms_export_dedup['index'] -organization_export -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namewebsitecountrystarting_yearis_funderrorfundrefid
0École Polytechnique Fédérale de Lausanne (EPFL)http://www.epfl.ch/index.en.html21518530https://ror.org/02s3760525011000017031
1University of Geneva (UNIGE)https://www.unige.ch/21515590https://ror.org/01swzsf045011000063892
2Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...21518500https://ror.org/04d8ztx873
3Bern University of Applied Sciences (BFH)http://www.bfh.ch/en/home.html21519970https://ror.org/02bnkt3225011000062594
4ETH Zurich (ETH Zurich)https://www.ethz.ch/en.html21518550https://ror.org/05a28rw585011000030065
...........................
99National Aeronautics and Space Administration ...http://science.nasa.gov/23601https://ror.org/027ka1x80100000104100
100National Science Foundation (NSF)http://www.nsf.gov/23601https://ror.org/021nxhr62100000001101
101Academy of Medical Sciencehttps://acmedsci.ac.uk/23401https://ror.org/00c489v88501100000691102
102Prostate Cancer UKhttp://prostatecanceruk.org/23401https://ror.org/04dkv6329501100000771103
103Schweizerischer Nationalfonds zur Förderung de...http://www.snf.ch/de/Seiten/default.aspx21501https://ror.org/00yjd3n13501100001711104
-

104 rows × 8 columns

-
- - - - -```python -# export de la table -result = organization_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/organization.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export excel -organization_export.to_excel('sample/organization.xlsx', index=False) -``` - - -```python -# export csv -organization_export.to_csv('sample/organization.tsv', index=False) -``` - -## Table condition_set_term - - -```python -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archiving
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-81946125451688170121
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-118714631786122957710807856572614408351
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-44106140441472479071
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-4928686093300740071
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-27818217695488029669357667652881371100
...................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31768737782784609585522984959422009563581
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-31768737782784609585522984959422009563581
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-31768737782784609585522984959422009563581
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-31768737782784609585522984959422009563581
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-31768737782784609585522984959422009563581
-

48678 rows × 16 columns

-
- - - - -```python -terms_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecommentidsource
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...1
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...2
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...3
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...4
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...5
.......................................
1315NaN1.0-6020029623494903364-54358862379916614973581011Elsevier Read & Publish agreement1316
1316NaN18129.0-195526209948827643863594828014331812613581011NaN1317
1317NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement1318
1318NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement1319
1319NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...1320
-

1320 rows × 12 columns

-
- - - - -```python -# merge des terms id -term_orig = pd.merge(term_orig, terms_export_dedup[['id_content_hash', 'id']], on='id_content_hash', how='left') -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archivingid
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-819461254516881701211
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-1187146317861229577108078565726144083512
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-441061404414724790713
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-49286860933007400714
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-278182176954880296693576676528813711005
......................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-317687377827846095855229849594220095635811320
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-317687377827846095855229849594220095635811320
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-317687377827846095855229849594220095635811320
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-317687377827846095855229849594220095635811320
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-317687377827846095855229849594220095635811320
-

48678 rows × 17 columns

-
- - - - -```python -term_orig = term_orig.rename(columns = {'id' : 'term'}) -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archivingterm
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-819461254516881701211
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-1187146317861229577108078565726144083512
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-441061404414724790713
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-49286860933007400714
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-278182176954880296693576676528813711005
......................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-317687377827846095855229849594220095635811320
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-317687377827846095855229849594220095635811320
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-317687377827846095855229849594220095635811320
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-317687377827846095855229849594220095635811320
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-317687377827846095855229849594220095635811320
-

48678 rows × 17 columns

-
- - - - -```python -condition_type -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - -
idcondition_issuer
01Journal-only
12Organization-only
23Journal-organization agreement
-
- - - - -```python -# merge des condition type -term_orig['condition_type'] = 3 -term_orig.loc[term_orig['ror'].isna(), 'condition_type'] = 1 -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archivingtermcondition_type
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-8194612545168817012111
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-11871463178612295771080785657261440835121
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-4410614044147247907131
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-492868609330074007141
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-2781821769548802966935766765288137110051
.........................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-3176873778278460958552298495942200956358113203
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-3176873778278460958552298495942200956358113203
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-3176873778278460958552298495942200956358113203
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-3176873778278460958552298495942200956358113203
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-3176873778278460958552298495942200956358113203
-

48678 rows × 18 columns

-
- - - - -```python -organization_export -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
namewebsitecountrystarting_yearis_funderrorfundrefid
0École Polytechnique Fédérale de Lausanne (EPFL)http://www.epfl.ch/index.en.html21518530https://ror.org/02s3760525011000017031
1University of Geneva (UNIGE)https://www.unige.ch/21515590https://ror.org/01swzsf045011000063892
2Agroscopehttps://www.agroscope.admin.ch/agroscope/en/ho...21518500https://ror.org/04d8ztx873
3Bern University of Applied Sciences (BFH)http://www.bfh.ch/en/home.html21519970https://ror.org/02bnkt3225011000062594
4ETH Zurich (ETH Zurich)https://www.ethz.ch/en.html21518550https://ror.org/05a28rw585011000030065
...........................
99National Aeronautics and Space Administration ...http://science.nasa.gov/23601https://ror.org/027ka1x80100000104100
100National Science Foundation (NSF)http://www.nsf.gov/23601https://ror.org/021nxhr62100000001101
101Academy of Medical Sciencehttps://acmedsci.ac.uk/23401https://ror.org/00c489v88501100000691102
102Prostate Cancer UKhttp://prostatecanceruk.org/23401https://ror.org/04dkv6329501100000771103
103Schweizerischer Nationalfonds zur Förderung de...http://www.snf.ch/de/Seiten/default.aspx21501https://ror.org/00yjd3n13501100001711104
-

104 rows × 8 columns

-
- - - - -```python -# merge des organizations -term_orig = pd.merge(term_orig, organization_export[['ror', 'id']], on='ror', how='left') -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archivingtermcondition_typeid
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-8194612545168817012111NaN
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-11871463178612295771080785657261440835121NaN
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-4410614044147247907131NaN
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-492868609330074007141NaN
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-2781821769548802966935766765288137110051NaN
............................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31768737782784609585522984959422009563581132032.0
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-317687377827846095855229849594220095635811320335.0
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-317687377827846095855229849594220095635811320338.0
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-317687377827846095855229849594220095635811320346.0
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-317687377827846095855229849594220095635811320343.0
-

48678 rows × 19 columns

-
- - - - -```python -term_orig = term_orig.rename(columns = {'id' : 'organization'}) -term_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherpaversioncost_factorembargo_monthsarchivinglicencejournalprerequisite_fundersrorcommentrp_idvalid_fromvalid_untilid_content_hashid_content_hash_licenceir_archivingtermcondition_typeorganization
01.019999990True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-5068777248818105392-8194612545168817012111NaN
12.0299999912True999999532.0NaNNaNInstitutional archiving locations: Non-Commerc...NaNNaNNaN-11871463178612295771080785657261440835121NaN
23.033550True1532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN-6827815856646016670-4410614044147247907131NaN
34.033550True2532.0NaNNaNInstitutional archiving locations: Any Website...NaNNaNNaN5388365857945903435-492868609330074007141NaN
45.019999990False999999498.0NaNNaNNon institutional archiving locations: ChemRxi...NaNNaNNaN-2781821769548802966935766765288137110051NaN
............................................................
48673NaN358160True5592.0NaNhttps://ror.org/01swzsf04Cambridge University Press (CUP) Read & Publis...40079.02021-01-012023-12-31768737782784609585522984959422009563581132032.0
48674NaN358160True5592.0NaNhttps://ror.org/019whta54Cambridge University Press (CUP) Read & Publis...40080.02021-01-012023-12-317687377827846095855229849594220095635811320335.0
48675NaN358160True5592.0NaNhttps://ror.org/00vasag41Cambridge University Press (CUP) Read & Publis...40081.02021-01-012023-12-317687377827846095855229849594220095635811320338.0
48676NaN358160True5592.0NaNhttps://ror.org/05r0ap620Cambridge University Press (CUP) Read & Publis...40082.02021-01-012023-12-317687377827846095855229849594220095635811320346.0
48677NaN358160True5592.0NaNhttps://ror.org/05pmsvm27Cambridge University Press (CUP) Read & Publis...40083.02021-01-012023-12-317687377827846095855229849594220095635811320343.0
-

48678 rows × 19 columns

-
- - - - -```python -# concat valeurs avec même id -condition_set_term_dedup_terms = term_orig[['term', 'id_content_hash']] -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms.drop_duplicates() -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms_dedup.loc[condition_set_term_dedup_terms_dedup['term'].notna()] -condition_set_term_dedup_terms_dedup['term'] = condition_set_term_dedup_terms_dedup['term'].astype(int) -condition_set_term_dedup_terms_dedup['term'] = condition_set_term_dedup_terms_dedup['term'].astype(str) -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms_dedup.groupby('id_content_hash').agg({'term': lambda x: ', '.join(x)}) -condition_set_term_dedup_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
term
id_content_hash
-9213354388875732238271
-92000707444225583771039
-91717831170231043951175
-91349526464689481631283
-91330136487514062891106
......
91950013304323528931103
9200466168345981543250
9213878808178729253580
921838920891277788238
9219045216097074691919
-

1320 rows × 1 columns

-
- - - - -```python -# concat valeurs avec même id -condition_set_term_dedup_journals = term_orig[['journal', 'id_content_hash']] -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals.drop_duplicates() -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals_dedup.loc[condition_set_term_dedup_journals_dedup['journal'].notna()] -condition_set_term_dedup_journals_dedup['journal'] = condition_set_term_dedup_journals_dedup['journal'].astype(int) -condition_set_term_dedup_journals_dedup['journal'] = condition_set_term_dedup_journals_dedup['journal'].astype(str) -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals_dedup.groupby('id_content_hash').agg({'journal': lambda x: ', '.join(x)}) -condition_set_term_dedup_journals_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
journal
id_content_hash
-9213354388875732238342, 219, 18, 918, 309, 543, 642, 27, 246, 64,...
-9200070744422558377427
-9171783117023104395548, 240, 298, 132, 3, 516
-9134952646468948163990
-9133013648751406289366
......
9195001330432352893687
9200466168345981543230
9213878808178729253722
9218389208912777882199
9219045216097074691190
-

1320 rows × 1 columns

-
- - - - -```python -# concat valeurs avec même id -condition_set_term_dedup_organizations = term_orig[['organization', 'id_content_hash']] -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations.drop_duplicates() -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations_dedup.loc[condition_set_term_dedup_organizations_dedup['organization'].notna()] -condition_set_term_dedup_organizations_dedup['organization'] = condition_set_term_dedup_organizations_dedup['organization'].astype(int) -condition_set_term_dedup_organizations_dedup['organization'] = condition_set_term_dedup_organizations_dedup['organization'].astype(str) -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations_dedup.groupby('id_content_hash').agg({'organization': lambda x: ', '.join(x)}) -condition_set_term_dedup_organizations_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
organization
id_content_hash
-921335438887573223875, 76, 77, 78
-920007074442255837747
-913495264646894816348, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
-913301364875140628948, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
-908512951995045593848, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
......
874525338389352471948, 64, 51, 74, 68, 67, 69, 59
891340129846520381148, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
899944714990810149548, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
919500133043235289348, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
921904521609707469148, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59...
-

277 rows × 1 columns

-
- - - - -```python -# concat valeurs avec même id : pas possible pour condition_type -condition_set_term_dedup_condition_types = term_orig[['condition_type', 'id_content_hash']] -condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types.drop_duplicates() -condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types_dedup.loc[condition_set_term_dedup_condition_types_dedup['condition_type'].notna()] -# condition_set_term_dedup_condition_types_dedup['condition_type'] = condition_set_term_dedup_condition_types_dedup['condition_type'].astype(int) -# condition_set_term_dedup_condition_types_dedup['condition_type'] = condition_set_term_dedup_condition_types_dedup['condition_type'].astype(str) -# condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types_dedup.groupby('id_content_hash').agg({'condition_type': lambda x: ', '.join(x)}) -condition_set_term_dedup_condition_types_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeid_content_hash
01-5068777248818105392
11-1187146317861229577
21-6827815856646016670
315388365857945903435
41-2781821769548802966
.........
334393-681455397323083870
4734436747956201225830719
4736216747956201225830719
4775837687377827846095855
4777617687377827846095855
-

1533 rows × 2 columns

-
- - - - -```python -# recuperation des ids groupés -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_terms_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_journals_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_organizations_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_condition_types_dedup, on='id_content_hash', how='left') -terms_export_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_sherparp_idid_content_hashid_content_hash_licenceversioncost_factorembargo_monthsir_archivinglicencecommentidsourcetermjournalorganizationcondition_type
01.0NaN-5068777248818105392-8194612545168817012199999901999999Institutional archiving locations: Non-Commerc...11532, 482, 452, 663, 323, 674, 317, 154, 439, 5...NaN1
12.0NaN-118714631786122957710807856572614408352999999121999999Institutional archiving locations: Non-Commerc...22532, 482, 452, 663, 323, 674, 317, 154, 439, 5...NaN1
23.0NaN-6827815856646016670-44106140441472479073355011Institutional archiving locations: Any Website...33532NaN1
34.0NaN5388365857945903435-4928686093300740073355012Institutional archiving locations: Any Website...44532NaN1
45.0NaN-2781821769548802966935766765288137110199999900999999Non institutional archiving locations: ChemRxi...55498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...NaN1
...................................................
1528NaN24845.0-68145539732308387052650796891404219893581011Wiley Read & Publish agreement13181318942, 854, 933, 297, 130, 144, 549, 283, 512, 1...3, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...3
1529NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement13191319714, 633, 48, 704, 408, 535, 754, 581, 9793, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...3
1530NaN38750.06747956201225830719-46487586084290985343581011Taylor and Francis Read & Publish agreement13191319714, 633, 48, 704, 408, 535, 754, 581, 9793, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1
1531NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...13201320866, 171, 186, 839, 5923, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...3
1532NaN39164.07687377827846095855229848806545540740235816011Cambridge University Press (CUP) Read & Publis...13201320866, 171, 186, 839, 5923, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1
-

1533 rows × 16 columns

-
- - - - -```python -condition_sets_orig = terms_export_dedup[['term', 'condition_type', 'organization', 'journal']] -condition_sets_orig -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
termcondition_typeorganizationjournal
011NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...
121NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...
231NaN532
341NaN532
451NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...
...............
1528131833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...
1529131933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979
1530131913, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979
1531132033, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592
1532132013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592
-

1533 rows × 4 columns

-
- - - - -```python -# ajout d'un hash unique pour chaque variante -condition_sets_orig['id_term_hash'] = condition_sets_orig.apply(lambda x: hash(tuple(x[['condition_type', 'organization', 'journal']])), axis = 1) -condition_sets_orig -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
termcondition_typeorganizationjournalid_term_hash
011NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-5197283134070040275
121NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-5197283134070040275
231NaN532-3428409893954144223
341NaN532-3428409893954144223
451NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...5362274893926121442
..................
1528131833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-32115995447722756
1529131933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 9794789694892756018439
1530131913, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 9797722626036678389533
1531132033, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 5926902392350219571553
1532132013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 5924611302665250055299
-

1533 rows × 5 columns

-
- - - - -```python -# grouper les termes qui ont les mêmes valeurs pour le reste -condition_sets_orig_terms = condition_sets_orig[['term', 'id_term_hash']] -condition_sets_orig_terms_dedup = condition_sets_orig_terms.drop_duplicates() -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['term'].notna()] -condition_sets_orig_terms_dedup['term'] = condition_sets_orig_terms_dedup['term'].astype(int) -condition_sets_orig_terms_dedup['term'] = condition_sets_orig_terms_dedup['term'].astype(str) -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.groupby('id_term_hash').agg({'term': lambda x: ', '.join(x)}) -condition_sets_orig_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
term
id_term_hash
-9221122160312283608796
-9194263828544732083812
-91929449611264080891246
-9191653994283170820965
-91807822994803644411185
......
9197647807999611822421
9200686802301911565359
92032187412307672131056
9211734360905731286630, 631
9214772761176685077706
-

1149 rows × 1 columns

-
- - - - -```python -# ajout des ids groupées -condition_sets_orig_terms = pd.merge(condition_sets_orig, condition_sets_orig_terms_dedup, on='id_term_hash', how='left') -condition_sets_orig_terms -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
term_xcondition_typeorganizationjournalid_term_hashterm_y
011NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
121NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
231NaN532-34284098939541442233, 4
341NaN532-34284098939541442233, 4
451NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 6
.....................
1528131833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-321159954477227561318
1529131933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97947896948927560184391319
1530131913, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97977226260366783895331319
1531132033, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59269023923502195715531320
1532132013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59246113026652500552991320
-

1533 rows × 6 columns

-
- - - - -```python -# rename terms -del condition_sets_orig_terms['term_x'] -condition_sets_orig_terms = condition_sets_orig_terms.rename(columns = {'term_y' : 'term'}) -condition_sets_orig_terms -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashterm
01NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
11NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
21NaN532-34284098939541442233, 4
31NaN532-34284098939541442233, 4
41NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 6
..................
152833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-321159954477227561318
152933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97947896948927560184391319
153013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97977226260366783895331319
153133, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59269023923502195715531320
153213, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59246113026652500552991320
-

1533 rows × 5 columns

-
- - - - -```python -# test duplicates -condition_sets_orig_terms.loc[condition_sets_orig_terms.duplicated()].sort_values(by='term') -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashterm
11NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
11871NaN779-91040221086658593781001, 1002, 1003
11881NaN779-91040221086658593781001, 1002, 1003
11901NaN7, 22-57959714025828680511004, 1005
11941NaN825-29857252040668413361008, 1009
..................
11611NaN855158530994336307876978, 979
11681NaN654-5164377982436891368984, 985
11791NaN751-1857992192228010123993, 994, 995
11801NaN751-1857992192228010123993, 994, 995
11821NaN531-3353627437951234546996, 997
-

384 rows × 5 columns

-
- - - - -```python -condition_sets_orig_terms.loc[condition_sets_orig_terms.duplicated()].shape[0] -``` - - - - - 384 - - - - -```python -condition_sets_orig_terms_dedup = condition_sets_orig_terms.drop_duplicates() -condition_sets_orig_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashterm
01NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
21NaN532-34284098939541442233, 4
41NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 6
61NaN498-7139474688484852577, 8
81NaN789-53320450395728364569, 10, 11, 12
..................
152833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-321159954477227561318
152933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97947896948927560184391319
153013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97977226260366783895331319
153133, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59269023923502195715531320
153213, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59246113026652500552991320
-

1149 rows × 5 columns

-
- - - - -```python -# ajout des champs manquants -condition_sets_orig_terms_dedup['comment'] = '' -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - -```python -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['journal'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcomment
-
- - - - -```python -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['term'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcomment
-
- - - - -```python -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['condition_type'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcomment
-
- - - - -```python -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['organization'].isna()] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcomment
01NaN532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
21NaN532-34284098939541442233, 4
41NaN498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 6
61NaN498-7139474688484852577, 8
81NaN789-53320450395728364569, 10, 11, 12
.....................
15151NaN87030318528692284251371306, 1307
15171NaN41-79020561546065098061308, 1309
15191NaN8076578672144179594851310, 1311
15211NaN53373038623529842952821312, 1313
15231NaN60865480185615639066771314, 1315
-

661 rows × 6 columns

-
- - - - -```python -# remplacement des "nan" -condition_sets_orig_terms_dedup['organization'] = condition_sets_orig_terms_dedup['organization'].fillna('') -condition_sets_orig_terms_dedup -``` - - C:\Users\iriarte\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: - A value is trying to be set on a copy of a slice from a DataFrame. - Try using .loc[row_indexer,col_indexer] = value instead - - See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy - - - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcomment
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 2
21532-34284098939541442233, 4
41498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 6
61498-7139474688484852577, 8
81789-53320450395728364569, 10, 11, 12
.....................
152833, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-321159954477227561318
152933, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97947896948927560184391319
153013, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 97977226260366783895331319
153133, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59269023923502195715531320
153213, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 59246113026652500552991320
-

1149 rows × 6 columns

-
- - - - -```python -# convertir l'index en id -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.reset_index() -# ajout de l'id avec l'index + 1 -condition_sets_orig_terms_dedup['id'] = condition_sets_orig_terms_dedup['index'] + 1 -del condition_sets_orig_terms_dedup['index'] -condition_sets_orig_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcommentid
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 21
11532-34284098939541442233, 43
21498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 65
31498-7139474688484852577, 87
41789-53320450395728364569, 10, 11, 129
........................
114433, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-3211599544772275613181529
114533, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979478969489275601843913191530
114613, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979772262603667838953313191531
114733, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592690239235021957155313201532
114813, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592461130266525005529913201533
-

1149 rows × 7 columns

-
- - - - -```python -# convertir l'index en id -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.reset_index() -# ajout de l'id avec l'index + 1 -condition_sets_orig_terms_dedup['id'] = condition_sets_orig_terms_dedup['index'] + 1 -del condition_sets_orig_terms_dedup['index'] -condition_sets_orig_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcommentid
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 21
11532-34284098939541442233, 42
21498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 63
31498-7139474688484852577, 84
41789-53320450395728364569, 10, 11, 125
........................
114433, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-3211599544772275613181145
114533, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979478969489275601843913191146
114613, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979772262603667838953313191147
114733, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592690239235021957155313201148
114813, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592461130266525005529913201149
-

1149 rows × 7 columns

-
- - - - -```python -# export de la table -result = condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/condition_set.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export excel -condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_excel('sample/condition_set.xlsx', index=False) -``` - - -```python -# export csv -condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_csv('sample/condition_set.tsv', index=False) -``` - -## Table organization_condition_set - - -```python -condition_sets_orig_terms_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcommentid
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...-51972831340700402751, 21
11532-34284098939541442233, 42
21498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...53622748939261214425, 63
31498-7139474688484852577, 84
41789-53320450395728364569, 10, 11, 125
........................
114433, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-3211599544772275613181145
114533, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979478969489275601843913191146
114613, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979772262603667838953313191147
114733, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592690239235021957155313201148
114813, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592461130266525005529913201149
-

1149 rows × 7 columns

-
- - - - -```python -condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['organization'].notna()) & (condition_sets_orig_terms_dedup['organization'] != '')] -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_typeorganizationjournalid_term_hashtermcommentid
5347789-6118989085408562349136
11347668, 576, 371, 410, 849, 184, 670, 559, 58, 16...70263764888625437962212
12147668, 576, 371, 410, 849, 184, 670, 559, 58, 16...88994974481300366982213
21148, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 78985, 485, 787, 415, 189, 395, 652, 83, 227, 44...35305052837971392764222
22348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 78985, 485, 787, 415, 189, 395, 652, 83, 227, 44...30564024657118466664223
........................
114433, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...942, 854, 933, 297, 130, 144, 549, 283, 512, 1...-3211599544772275613181145
114533, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979478969489275601843913191146
114613, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...714, 633, 48, 704, 408, 535, 754, 581, 979772262603667838953313191147
114733, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592690239235021957155313201148
114813, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...866, 171, 186, 839, 592461130266525005529913201149
-

488 rows × 7 columns

-
- - - - -```python -# creation du DF -# col_names = ['id', -# 'organization', -# 'condition_set', -# 'valid_from', -# 'valid_until' -# ] -# organization_condition = pd.DataFrame(columns = col_names) -organization_condition = condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['organization'].notna()) & (condition_sets_orig_terms_dedup['organization'] != '')][['id', 'organization', 'term']] -organization_condition -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationterm
564713
11124722
12134722
212248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
222348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
............
114411453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1318
114511463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114611473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114711483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
114811493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
-

488 rows × 3 columns

-
- - - - -```python -# extraction des terms ids -organization_condition_split = organization_condition.assign(term = organization_condition.term.str.split(',')).explode('term') -organization_condition_split -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationterm
564713
11124722
12134722
212248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
222348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
............
114411453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1318
114511463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114611473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114711483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
114811493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
-

490 rows × 3 columns

-
- - - - -```python -organization_condition_split.loc[organization_condition_split['organization'].isna()] -``` - - - - -
- - - - - - - - - - - - -
idorganizationterm
-
- - - - -```python -organization_condition_split.loc[organization_condition_split['term'].isna()] -``` - - - - -
- - - - - - - - - - - - -
idorganizationterm
-
- - - - -```python -organization_condition_split['term'] = organization_condition_split['term'].astype(int) -organization_condition_split -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationterm
564713
11124722
12134722
212248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
222348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
............
114411453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1318
114511463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114611473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1319
114711483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
114811493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...1320
-

490 rows × 3 columns

-
- - - - -```python -# ajout du ROR -terms_export_dates -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
id_content_hashrorvalid_fromvalid_untilterm
0-6020029623494903364https://ror.org/04d8ztx872020-01-012023-12-311316
1-6020029623494903364https://ror.org/02bnkt3222020-01-012023-12-311316
2-6020029623494903364https://ror.org/00zg4za482020-01-012023-12-311316
3-6020029623494903364https://ror.org/02s3760522020-01-012023-12-311316
4-6020029623494903364https://ror.org/05a28rw582020-01-012023-12-311316
..................
400787687377827846095855https://ror.org/01swzsf042021-01-012023-12-311320
400797687377827846095855https://ror.org/019whta542021-01-012023-12-311320
400807687377827846095855https://ror.org/00vasag412021-01-012023-12-311320
400817687377827846095855https://ror.org/05r0ap6202021-01-012023-12-311320
400827687377827846095855https://ror.org/05pmsvm272021-01-012023-12-311320
-

40083 rows × 5 columns

-
- - - - -```python -# merge pour obtenir les dates -organization_condition_split = pd.merge(organization_condition_split, terms_export_dates[['term', 'valid_from', 'valid_until']], on='term', how='left') -organization_condition_split -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationtermvalid_fromvalid_until
064713NaNNaN
1124722NaNNaN
2134722NaNNaN
32248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
42348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
..................
4861011493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
4861111493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
4861211493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
4861311493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
4861411493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
-

48615 rows × 5 columns

-
- - - - -```python -# dédoublonage -organization_condition_split_dedup = organization_condition_split.drop_duplicates() -organization_condition_split_dedup -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationtermvalid_fromvalid_until
064713NaNNaN
1124722NaNNaN
2134722NaNNaN
32248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
42348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
..................
3204211453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13182021-01-012024-12-31
4594711463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
4636111473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
4677511483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
4769511493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
-

490 rows × 5 columns

-
- - - - -```python -organization_condition = pd.merge(organization_condition, organization_condition_split_dedup[['id', 'valid_from', 'valid_until']], on='id', how='left') -organization_condition -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idorganizationtermvalid_fromvalid_until
064713NaNNaN
1124722NaNNaN
2134722NaNNaN
32248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
42348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842NaNNaN
..................
48511453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13182021-01-012024-12-31
48611463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
48711473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
48811483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
48911493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
-

490 rows × 5 columns

-
- - - - -```python -organization_condition = organization_condition.rename(columns = {'id' : 'condition_set'}) -organization_condition['valid_from'] = organization_condition['valid_from'].fillna('') -organization_condition['valid_until'] = organization_condition['valid_until'].fillna('') -organization_condition -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setorganizationtermvalid_fromvalid_until
064713
1124722
2134722
32248, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
42348, 64, 51, 74, 68, 67, 69, 59, 75, 76, 77, 7842
..................
48511453, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13182021-01-012024-12-31
48611463, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
48711473, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13192021-01-012023-12-31
48811483, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
48911493, 4, 6, 24, 1, 5, 31, 27, 7, 8, 28, 11, 44, 1...13202021-01-012023-12-31
-

490 rows × 5 columns

-
- - - - -```python -# split final pour avoir une ligne par organization -organization_condition_fin = organization_condition.assign(organization = organization_condition.organization.str.split(',')).explode('organization') -organization_condition_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setorganizationtermvalid_fromvalid_until
064713
1124722
2134722
3224842
3226442
..................
4891149213202021-01-012023-12-31
48911493513202021-01-012023-12-31
48911493813202021-01-012023-12-31
48911494613202021-01-012023-12-31
48911494313202021-01-012023-12-31
-

6834 rows × 5 columns

-
- - - - -```python -# ajout de l'id avec l'index + 1 -organization_condition_fin = organization_condition_fin.reset_index() -organization_condition_fin['id'] = organization_condition_fin.index + 1 -del organization_condition_fin['index'] -organization_condition_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setorganizationtermvalid_fromvalid_untilid
0647131
11247222
21347223
32248424
42264425
.....................
68291149213202021-01-012023-12-316830
683011493513202021-01-012023-12-316831
683111493813202021-01-012023-12-316832
683211494613202021-01-012023-12-316833
683311494313202021-01-012023-12-316834
-

6834 rows × 6 columns

-
- - - - -```python -# export de la table -result = organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/organization_condition.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export excel -organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_excel('sample/organization_condition.xlsx', index=False) -``` - - -```python -# export csv -organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_csv('sample/organization_condition.tsv', index=False) -``` - -## Table journal_condition_set - - -```python -# creation du DF -# col_names = ['id', -# 'journal', -# 'condition_set', -# 'valid_from', -# 'valid_until' -# ] -# journal_condition = pd.DataFrame(columns = col_names) -journal_condition = condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['journal'].notna()) & (condition_sets_orig_terms_dedup['journal'] != '')][['id', 'journal']] -journal_condition -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
idjournal
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...
12532
23498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...
34498
45789
.........
11441145942, 854, 933, 297, 130, 144, 549, 283, 512, 1...
11451146714, 633, 48, 704, 408, 535, 754, 581, 979
11461147714, 633, 48, 704, 408, 535, 754, 581, 979
11471148866, 171, 186, 839, 592
11481149866, 171, 186, 839, 592
-

1149 rows × 2 columns

-
- - - - -```python -journal_condition = journal_condition.rename(columns = {'id' : 'condition_set'}) -journal_condition['valid_from'] = '' -journal_condition['valid_until'] = '' -journal_condition -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setjournalvalid_fromvalid_until
01532, 482, 452, 663, 323, 674, 317, 154, 439, 5...
12532
23498, 70, 359, 573, 63, 66, 274, 116, 384, 163,...
34498
45789
...............
11441145942, 854, 933, 297, 130, 144, 549, 283, 512, 1...
11451146714, 633, 48, 704, 408, 535, 754, 581, 979
11461147714, 633, 48, 704, 408, 535, 754, 581, 979
11471148866, 171, 186, 839, 592
11481149866, 171, 186, 839, 592
-

1149 rows × 4 columns

-
- - - - -```python -# split final pour avoir une ligne par journal -journal_condition_fin = journal_condition.assign(journal = journal_condition.journal.str.split(',')).explode('journal') -journal_condition_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setjournalvalid_fromvalid_until
01532
01482
01452
01663
01323
...............
11481149866
11481149171
11481149186
11481149839
11481149592
-

3033 rows × 4 columns

-
- - - - -```python -# ajout de l'id avec l'index + 1 -journal_condition_fin = journal_condition_fin.reset_index() -journal_condition_fin['id'] = journal_condition_fin.index + 1 -del journal_condition_fin['index'] -journal_condition_fin -``` - - - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
condition_setjournalvalid_fromvalid_untilid
015321
114822
214523
316634
413235
..................
302811498663029
302911491713030
303011491863031
303111498393032
303211495923033
-

3033 rows × 5 columns

-
- - - - -```python -# export de la table -result = journal_condition_fin.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/journal_condition.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) -``` - - -```python -# export excel -journal_condition_fin.to_excel('sample/journal_condition.xlsx', index=False) -``` - - -```python -# export csv -journal_condition_fin.to_csv('sample/journal_condition.tsv', index=False) -``` - - -```python - -``` diff --git a/import_scripts/10_oacct_terms.py b/import_scripts/10_oacct_terms.py deleted file mode 100644 index bb7de41f..00000000 --- a/import_scripts/10_oacct_terms.py +++ /dev/null @@ -1,1975 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet de modifier les données extraites des differentes sources et les exporter dans les tables de l'application OACCT. -# -# Auteur : **Pablo Iriarte**, Université de Genève (pablo.iriarte@unige.ch) -# Date de dernière mise à jour : 08.09.2021 - -# In[1]: - - -import pandas as pd -import csv -import json -import numpy as np -import os -# afficher toutes les colonnes -pd.set_option('display.max_columns', None) -# definir le debut des ids -id_start = 1 - - -# ## Import du fichier extrait de Sherpa - -# In[2]: - - -sherpa = pd.read_csv('sample/sherpa_policies_brut.tsv', encoding='utf-8', header=0, sep='\t') -sherpa - - -# In[3]: - - -# test des valeurs pour les versions -sherpa['article_version'].value_counts() - - -# In[4]: - - -# test des valeurs pour les issns -sherpa.loc[sherpa['issn'].isna()] - - -# In[5]: - - -# ajout des ISSN-L -issns = pd.read_csv('issn/20171102.ISSN-to-ISSN-L.txt', encoding='utf-8', header=0, sep='\t') -issns - - -# In[6]: - - -# renommer les colonnes -issns = issns.rename(columns={'ISSN' : 'issn', 'ISSN-L' : 'issnl'}) -issns - - -# In[7]: - - -# merge avec la table sherpa -sherpa = pd.merge(sherpa, issns, on='issn', how='left') -sherpa - - -# In[8]: - - -# test des valeurs pour les issnl -sherpa.loc[sherpa['issnl'].isna()] - - -# In[9]: - - -# extraction des données IR Archiving + Embargo par ISSN -sherpa_ir = sherpa[['issnl', ]] - - -# ## Import du fichier des licences Read & Publish - -# In[10]: - - -rp = pd.read_csv('sample/read_publish_brut_merge.tsv', encoding='utf-8', header=0, sep='\t') -rp - - -# In[11]: - - -rp['embargo_months'].value_counts() - - -# In[12]: - - -# ajout de l'éditeur dans un seul champ -# rp.loc[rp['Elsevier'] == 'x', 'public_notes'] = 'Elsevier Read & Publish agreement' -rp.loc[rp['Elsevier'] == 'x', 'rp_publisher'] = 'Elsevier' -rp.loc[rp['Springer Nature'] == 'x', 'rp_publisher'] = 'Springer Nature' -rp.loc[rp['Wiley'] == 'x', 'rp_publisher'] = 'Wiley' -rp.loc[rp['TF'] == 'x', 'rp_publisher'] = 'TF' -rp.loc[rp['CUP'] == 'x', 'rp_publisher'] = 'CUP' -rp - - -# In[13]: - - -# test des valeurs pour les versions -rp['rp_publisher'].value_counts() - - -# In[14]: - - -# test des valeurs pour les versions -rp['license'].value_counts() - - -# In[15]: - - -# supprimer les champs inutiles et renommer les colonnes -del rp['Elsevier'] -del rp['Springer Nature'] -del rp['Wiley'] -del rp['TF'] -del rp['CUP'] -del rp['URL'] -rp - - -# In[16]: - - -# renommer les colonnes -rp = rp.rename(columns = {'Title' : 'title', 'ROR' : 'ror', 'read_publish_id' : 'rp_id'}) -rp - - -# ## Table applicable_version - -# In[17]: - - -# creation du DF -col_names = ['id', - 'type', - 'description' - ] -applicable_version = pd.DataFrame(columns = col_names) -# 3 values : published, accepted, submitted -new_row1 = {'id':1, 'type':'submitted', 'description' : 'Submitted version'} -new_row2 = {'id':2, 'type':'accepted', 'description' : 'Accepted version'} -new_row3 = {'id':3, 'type':'published', 'description' : 'Published version'} -#append row to the dataframe -applicable_version = applicable_version.append(new_row1, ignore_index=True) -applicable_version = applicable_version.append(new_row2, ignore_index=True) -applicable_version = applicable_version.append(new_row3, ignore_index=True) -applicable_version - - -# In[18]: - - -# ajout de la valeur UNKNOWN -applicable_version = applicable_version.append({'id' : 999999, 'type' : 'UNKNOWN', 'description' : 'UNKNOWN'}, ignore_index=True) -applicable_version - - -# In[19]: - - -# renommage des champs finaux -applicable_version_export = applicable_version[['id', 'description']] - - -# In[20]: - - -# export de la table applicable_version -result = applicable_version_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/version.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[21]: - - -# export csv -applicable_version_export.to_csv('sample/version.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[22]: - - -# export excel -applicable_version_export.to_excel('sample/version.xlsx', index=False) - - -# In[23]: - - -# merge avec la table sherpa -sherpa = pd.merge(sherpa, applicable_version[['id', 'type']], left_on='article_version', right_on='type', how='left') -sherpa - - -# In[24]: - - -sherpa = sherpa.rename(columns = {'id_x' : 'id', 'id_y' : 'version'}) -del sherpa['type'] -sherpa - - -# In[25]: - - -# merge avec la table read & publish -rp = pd.merge(rp, applicable_version[['id', 'type']], left_on='article_version', right_on='type', how='left') -rp - - -# In[26]: - - -rp = rp.rename(columns = {'id' : 'version'}) -del rp['type'] -rp - - -# ## Table oa_licence - -# In[27]: - - -# creation du DF -# 'version' n'est pas utilisée, on dédoublonne par nom sans la version -col_names = ['id', - 'name', - 'url' - ] -oa_licence = pd.DataFrame(columns = col_names) -oa_licence - - -# In[28]: - - -# export des licences -sherpa['license'].value_counts() - - -# In[29]: - - -sherpa_licences = sherpa['license'].drop_duplicates() -sherpa_licences = sherpa_licences.dropna() -sherpa_licences - - -# In[30]: - - -oa_licence['sherpa_code'] = np.nan -oa_licence - - -# In[31]: - - -for code in sherpa_licences: - print (code) - oa_licence = oa_licence.append({'sherpa_code' : code}, ignore_index=True) - - -# In[32]: - - -oa_licence - - -# In[33]: - - -# convertir l'index en id -oa_licence = oa_licence.reset_index() -# ajout de l'id avec l'index + 1 -oa_licence['id'] = oa_licence['index'] + 1 -del oa_licence['index'] -oa_licence - - -# In[34]: - - -# ajout du nom et des URLs -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by', 'name'] = 'CC BY' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by', 'url'] = 'https://creativecommons.org/licenses/by/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_sa', 'name'] = 'CC BY-SA' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_sa', 'url'] = 'https://creativecommons.org/licenses/by-sa/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc', 'name'] = 'CC BY-NC' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc', 'url'] = 'https://creativecommons.org/licenses/by-nc/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_sa', 'name'] = 'CC BY-NC-SA' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_sa', 'url'] = 'https://creativecommons.org/licenses/by-nc-sa/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nd', 'name'] = 'CC BY-ND' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nd', 'url'] = 'https://creativecommons.org/licenses/by-nd/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_nd', 'name'] = 'CC BY-NC-ND' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_by_nc_nd', 'url'] = 'https://creativecommons.org/licenses/by-nc-nd/4.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc0', 'name'] = 'CC0' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc0', 'url'] = 'https://creativecommons.org/publicdomain/zero/1.0/' -oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'name'] = 'Specific license' -oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'url'] = '' -oa_licence.loc[oa_licence['sherpa_code'] == 'all_rights_reserved', 'name'] = 'All rights reserved' -oa_licence.loc[oa_licence['sherpa_code'] == 'all_rights_reserved', 'url'] = '' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_gnu_gpl', 'name'] = 'GNU GPL' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_gnu_gpl', 'url'] = 'http://gnugpl.org/' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_public_domain', 'name'] = 'Public domain' -oa_licence.loc[oa_licence['sherpa_code'] == 'cc_public_domain', 'url'] = 'https://creativecommons.org/share-your-work/public-domain/' -# oa_licence.loc[oa_licence['sherpa_code'] == 'bespoke_license', 'url'] = 'https://port.sas.ac.uk/mod/book/view.php?id=1340&chapterid=1003' -oa_licence - - -# In[35]: - - -# ajout de la valeur UNKNOWN -oa_licence = oa_licence.append({'id' : 999999, 'sherpa_code' : '___', 'name' : 'UNKNOWN', 'url' : ''}, ignore_index=True) -oa_licence - - -# In[36]: - - -# ajout aux tables sherpa et rp -sherpa = sherpa.rename(columns = {'license' : 'sherpa_code'}) -sherpa - - -# In[37]: - - -# ajout aux tables sherpa et rp -rp = rp.rename(columns = {'license' : 'sherpa_code'}) -rp - - -# In[38]: - - -# merge -sherpa = pd.merge(sherpa, oa_licence[['sherpa_code', 'id']], on='sherpa_code', how='left') -sherpa - - -# In[39]: - - -sherpa = sherpa.rename(columns = {'id_x' : 'id', 'id_y' : 'licence'}) -sherpa - - -# In[40]: - - -# merge -rp = pd.merge(rp, oa_licence[['sherpa_code', 'id']], on='sherpa_code', how='left') -rp - - -# In[41]: - - -rp = rp.rename(columns = {'id' : 'licence'}) -rp - - -# In[42]: - - -# renommage des champs finaux -oa_licence_export = oa_licence[['id', 'name', 'url']] -oa_licence_export = oa_licence_export.rename(columns={'name' : 'name_or_abbrev', 'url' : 'website'}) - - -# In[43]: - - -# export de la table oa_licence -result = oa_licence_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/licence.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[44]: - - -# export csv -oa_licence_export.to_csv('sample/licence.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[45]: - - -# export excel -oa_licence_export.to_excel('sample/licence.xlsx', index=False) - - -# ## Table cost_factor_type - -# In[46]: - - -# creation du DF -col_names = ['id', - 'name' - ] -cost_factor_type = pd.DataFrame(columns = col_names) -cost_factor_type = cost_factor_type.append({'id' : 1, 'name' : 'APC'}, ignore_index=True) -cost_factor_type = cost_factor_type.append({'id' : 2, 'name' : 'Discount'}, ignore_index=True) -cost_factor_type = cost_factor_type.append({'id' : 3, 'name' : 'Refund'}, ignore_index=True) -cost_factor_type - - -# In[47]: - - -# ajout de la valeur UNKNOWN -cost_factor_type = cost_factor_type.append({'id' : 999999, 'name' : 'UNKNOWN'}, ignore_index=True) -cost_factor_type - - -# In[48]: - - -# export de la table -result = cost_factor_type.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/cost_factor_type.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[49]: - - -# export csv -cost_factor_type.to_csv('sample/cost_factor_type.tsv', sep='\t', encoding='utf-8', index=False) - - -# In[50]: - - -# export excel -cost_factor_type.to_excel('sample/cost_factor_type.xlsx', index=False) - - -# ## Table cost_factor - -# ### Ajout des données des APCs depuis DOAJ - -# In[51]: - - -# ajout de DOAJ info -doaj = pd.read_csv('doaj/journalcsv__doaj_20210312_0636_utf8.csv', encoding='utf-8', header=0) -doaj - - -# In[52]: - - -# garder les lignes avec APC -doaj_apc = doaj.loc[doaj['APC'] == 'Yes'][['Journal ISSN (print version)', 'Journal EISSN (online version)', 'APC amount']] -doaj_apc - - -# In[53]: - - -# garder les lignes avec APC no -doaj_apc_no = doaj.loc[doaj['APC'] == 'No'][['Journal ISSN (print version)', 'Journal EISSN (online version)']] -doaj_apc_no - - -# In[54]: - - -# attribuer la valeur 0 -doaj_apc_no['APC amount'] = 0 -doaj_apc_no - - -# In[55]: - - -# ajout à la table des APC -doaj_apc = doaj_apc.append(doaj_apc_no, ignore_index=True) -doaj_apc - - -# In[56]: - - -# découpage du prix en 'amount' et 'symbol' -doaj_apc[['amount', 'symbol']] = doaj_apc['APC amount'].str.split(' ', n=1, expand=True) -doaj_apc - - -# In[57]: - - -doaj_apc.loc[doaj_apc['APC amount'] == 0, 'amount'] = 0 -doaj_apc.loc[doaj_apc['APC amount'] == 0, 'symbol'] = '' -doaj_apc - - -# In[58]: - - -# ajouter les champs manquants -doaj_apc['cost_factor_type'] = 1 -doaj_apc['comment'] = 'Source: DOAJ' -doaj_apc - - -# In[59]: - - -# renommer les champs -doaj_apc = doaj_apc.rename(columns = {'Journal ISSN (print version)' : 'issn_print', 'Journal EISSN (online version)' : 'issn_electronic'}) -doaj_apc - - -# In[60]: - - -# ajout du issn -doaj_apc['issn'] = doaj_apc['issn_electronic'] -doaj_apc - - -# In[61]: - - -doaj_apc.loc[doaj_apc['issn'].isna()] - - -# In[62]: - - -# ajout du issnp quand c'est vide -doaj_apc.loc[doaj_apc['issn'].isna(), 'issn'] = doaj_apc['issn_print'] -doaj_apc.loc[doaj_apc['issn'].isna()] - - -# In[63]: - - -doaj_apc = pd.merge(doaj_apc, issns, on='issn', how='left') -doaj_apc - - -# In[64]: - - -# renommer les colonnes -doaj_apc = doaj_apc.rename(columns={'issnl' : 'issn_link'}) -doaj_apc - - -# ### Ajout des APCs depuis la base Journal Database (Zurich Open Repository and Archive) -# -# https://www.jdb.uzh.ch/ - -# In[65]: - - -# JDB base de Zurich -jdb = pd.read_csv('zora/jdb_apcs.tsv', encoding='utf-8', header=0, sep='\t') -jdb - - -# In[66]: - - -# renommer l'id -jdb = jdb.rename(columns = {'id' : 'jdb_id'}) -jdb - - -# In[67]: - - -# ajouter les champs manquants -jdb['cost_factor_type'] = 1 -jdb['comment'] = 'Source: JDB (' + jdb['apc_date'].astype(str) + ')' -jdb - - -# In[68]: - - -# renommer les champs -jdb = jdb.rename(columns = {'apc_fee' : 'amount', 'apc_currency' : 'symbol'}) -jdb - - -# In[69]: - - -jdb = jdb.drop_duplicates(subset='jdb_id', keep='last') - - -# In[70]: - - -# import openapc avec les valeurs max -openapc = pd.read_csv('openapc/open_apc_max.tsv', encoding='utf-8', header=0, sep='\t') -openapc - - -# In[71]: - - -# renommer les champs -openapc = openapc.rename(columns = {'period' : 'apc_date', 'issn_l' : 'issn_link', 'euro' : 'amount'}) -openapc - - -# In[72]: - - -# ajouter le lien avec le type et le symbole -openapc['cost_factor_type'] = 1 -openapc['jdb_id'] = np.nan -openapc['symbol'] = 'EUR' -openapc['comment'] = 'Source: OpenAPC (' + openapc['apc_date'].astype(str) + ')' -openapc - - -# In[73]: - - -# ajout des lignes de openapc -jdb = jdb.append(openapc, ignore_index=True) -jdb - - -# In[74]: - - -# supprimer les doublons par issnl et date -jdb = jdb.drop_duplicates(subset=['issn_link', 'apc_date'], keep='first') -jdb - - -# In[75]: - - -# ajout de DOAJ -cost_factor = doaj_apc.append(jdb, ignore_index=True) -cost_factor - - -# In[76]: - - -# test issnl -cost_factor.loc[cost_factor['issn_link'].isna()] - - -# In[77]: - - -# merge avec issnl -cost_factor = pd.merge(cost_factor, issns, on='issn', how='left') -cost_factor - - -# In[78]: - - -# test issnl -cost_factor.loc[cost_factor['issnl'].isna()] - - -# In[79]: - - -#ajout des issn quand ça manque -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_print'] -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_electronic'] -cost_factor.loc[cost_factor['issn'].isna(), 'issn'] = cost_factor['issn_link'] -cost_factor.loc[cost_factor['issn'].isna()] - - -# In[80]: - - -#ajout des issnl quand ça manque -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_link'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_print'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn_electronic'] -cost_factor.loc[cost_factor['issnl'].isna(), 'issnl'] = cost_factor['issn'] -cost_factor.loc[cost_factor['issnl'].isna()] - - -# In[81]: - - -# prendre les ids pour le merge -cost_factor_ids = cost_factor[['issn', 'issnl', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_1 = cost_factor_ids_1.rename(columns = {'issn_link' : 'issn'}) -# cost_factor_ids_2 = cost_factor.loc[cost_factor['issn_electronic'].notna()][['issn_electronic', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_2 = cost_factor_ids_2.rename(columns = {'issn_electronic' : 'issn'}) -# cost_factor_ids_3 = cost_factor.loc[cost_factor['issn_print'].notna()][['issn_print', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids_3 = cost_factor_ids_3.rename(columns = {'issn_print' : 'issn'}) -# cost_factor_ids_4 = cost_factor.loc[cost_factor['issn'].notna()][['issn', 'cost_factor_type', 'amount', 'symbol', 'comment']] -# cost_factor_ids = cost_factor_ids_1.append(cost_factor_ids_2) -# cost_factor_ids = cost_factor_ids.append(cost_factor_ids_3) -# cost_factor_ids = cost_factor_ids.append(cost_factor_ids_4) -cost_factor_ids - - -# In[82]: - - -# supprimer les doublons et les vides -cost_factor_ids = cost_factor_ids.drop_duplicates(subset=['issnl']) -cost_factor_ids - - -# In[83]: - - -# merge dans l'autre sens pour garder que les lignes du fichier -cost_factor_ids = pd.merge(cost_factor_ids, sherpa[['id', 'issnl']], on='issnl', how='left') -cost_factor_ids - - -# In[84]: - - -# garder les lignes avec merge -cost_factor_ids_all = cost_factor_ids.loc[cost_factor_ids['id'].notnull()] -cost_factor_ids_all - - -# In[85]: - - -# supprimer les doublons -cost_factor_ids_all = cost_factor_ids_all.drop_duplicates(subset=['id']) -cost_factor_ids_all - - -# In[86]: - - -# supprimer les doublons par issnl -cost_factor_ids_all = cost_factor_ids_all.drop_duplicates(subset=['issnl']) -del cost_factor_ids_all['id'] -cost_factor_ids_all - - -# In[87]: - - -# convertir l'index en id -cost_factor_ids_all = cost_factor_ids_all.reset_index() -# ajout de l'id avec l'index + 1 -cost_factor_ids_all['cost_factor'] = cost_factor_ids_all['index'] + id_start -del cost_factor_ids_all['index'] -# convertir l'index en id -cost_factor_ids_all = cost_factor_ids_all.reset_index() -# ajout de l'id avec l'index + 1 -cost_factor_ids_all['cost_factor'] = cost_factor_ids_all['index'] + id_start -del cost_factor_ids_all['index'] -cost_factor_ids_all - - -# In[88]: - - -# merge avec la table sherpa -sherpa = pd.merge(sherpa, cost_factor_ids_all[['issnl', 'cost_factor']], on='issnl', how='left') -sherpa - - -# In[89]: - - -sherpa.loc[sherpa['cost_factor'].isna()] - - -# In[90]: - - -# garder les APCs pour la version published -sherpa.loc[sherpa['article_version'] != 'published', 'cost_factor'] = np.nan -sherpa.loc[sherpa['cost_factor'].notna()] - - -# In[91]: - - -# renommer l'id du fichier sherpa brut -# cost_factor_ids_all = cost_factor_ids_all.rename(columns = {'id' : 'id_sherpa'}) -cost_factor_ids_all = cost_factor_ids_all.rename(columns = {'cost_factor' : 'id'}) -cost_factor_ids_all - - -# In[92]: - - -cost_factor_ids_all['id'] = cost_factor_ids_all['id'].astype(int) - - -# In[93]: - - -cost_factor_ids_all - - -# In[94]: - - -cost_factor_export = cost_factor_ids_all[['id', 'cost_factor_type', 'amount', 'symbol', 'comment']] -cost_factor_export - - -# In[95]: - - -cost_factor_export.shape[0] - - -# In[96]: - - -# ajout de la valeur Rabais 100% pour les licences Read & Publish -rpid = cost_factor_export.shape[0] + 1 -cost_factor_export = cost_factor_export.append({'id' : rpid, 'cost_factor_type' : 2, 'amount' : 100, 'symbol' : '%', 'comment' : 'Read & Publish agreement'}, ignore_index=True) -cost_factor_export - - -# In[97]: - - -# ajout de l'id dans la table read & publish -rp['cost_factor'] = rpid -rp - - -# In[98]: - - -# ajout de la valeur UNKNOWN -cost_factor_export = cost_factor_export.append({'id' : 999999, 'cost_factor_type' : 999999, 'amount' : 0, 'symbol' : '', 'comment' : 'UNKNOWN'}, ignore_index=True) -cost_factor_export - - -# In[99]: - - -# export de la table -result = cost_factor_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/cost_factor.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[100]: - - -# export csv -cost_factor_export.to_csv('sample/cost_factor.tsv', index=False) - - -# In[101]: - - -# export excel -cost_factor_export.to_excel('sample/cost_factor.xlsx', index=False) - - -# ## Table term - -# In[102]: - - -sherpa - - -# In[103]: - - -# col_names = ['id', 'applicable_version', 'cost_factor', 'embargo', 'archiving'] -term_sherpa = sherpa[['id', 'version', 'cost_factor', 'embargo', 'archiving', 'locations_ir', 'locations_not_ir', 'licence', 'journal', 'conditions', 'public_notes', 'prerequisite_funders', 'prerequisite_funders_ror']] -term_sherpa - - -# In[104]: - - -# renommer les champs -term_sherpa = term_sherpa.rename(columns = {'id' : 'id_sherpa', 'embargo' : 'embargo_months', 'prerequisite_funders_ror' : 'ror'}) -term_sherpa - - -# In[105]: - - -# merge des champs dans le comment : conditions, public_notes, locations_not_ir -term_sherpa['conditions'] = term_sherpa['conditions'].fillna('') -term_sherpa['public_notes'] = term_sherpa['public_notes'].fillna('') -term_sherpa['locations_not_ir'] = term_sherpa['locations_not_ir'].fillna('') -term_sherpa['locations_ir'] = term_sherpa['locations_ir'].fillna('') -term_sherpa.loc[term_sherpa['locations_not_ir'] != '', 'locations_not_ir'] = 'Non institutional archiving locations: ' + term_sherpa['locations_not_ir'] -term_sherpa.loc[term_sherpa['locations_ir'] != '', 'locations_ir'] = 'Institutional archiving locations: ' + term_sherpa['locations_ir'] -term_sherpa.loc[term_sherpa['archiving'] == False, 'comment'] = term_sherpa['locations_not_ir'] -term_sherpa.loc[term_sherpa['archiving'] == True, 'comment'] = term_sherpa['locations_ir'] -term_sherpa.loc[term_sherpa['comment'] == '', 'comment'] = 'Conditions: ' + term_sherpa['conditions'] -term_sherpa.loc[(term_sherpa['comment'] != '') & (term_sherpa['conditions'] != ''), 'comment'] = term_sherpa['comment'] + ' ; Conditions: ' + term_sherpa['conditions'] -term_sherpa.loc[(term_sherpa['public_notes'] != '') & (term_sherpa['public_notes'] != term_sherpa['comment']), 'comment'] = term_sherpa['comment'] + ' ; Public notes: ' + term_sherpa['public_notes'] -term_sherpa.loc[(term_sherpa['public_notes'] != '') & (term_sherpa['comment'] == ''), 'comment'] = 'Public notes: ' + term_sherpa['public_notes'] -term_sherpa - - -# In[106]: - - -term_sherpa['prerequisite_funders'].value_counts() - - -# In[107]: - - -rp - - -# In[108]: - - -term_rp = rp[['rp_id', 'version', 'archiving', 'embargo_months', 'cost_factor', 'licence', 'journal', 'rp_publisher', 'ror', 'valid_from', 'valid_until']] -term_rp - - -# In[109]: - - -term_rp['rp_publisher'].value_counts() - - -# In[110]: - - -term_rp.loc[term_rp['rp_publisher'] == 'Elsevier', 'comment'] = 'Elsevier Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'Wiley', 'comment'] = 'Wiley Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'TF', 'comment'] = 'Taylor and Francis Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'Springer Nature ', 'comment'] = 'Springer Nature Read & Publish agreement' -term_rp.loc[term_rp['rp_publisher'] == 'CUP', 'comment'] = 'Cambridge University Press (CUP) Read & Publish agreement. Article types covered: Research Articles, Review Articles, Rapid Communication, Brief Reports and Case Reports' -del term_rp['rp_publisher'] -term_rp - - -# In[111]: - - -# cocnat de deux tables -term_orig = term_sherpa[['id_sherpa', 'version', 'cost_factor', 'embargo_months', 'archiving', 'licence', 'journal', 'prerequisite_funders', 'ror', 'comment']] -term_orig - - -# In[112]: - - -term_orig = term_orig.append(term_rp, ignore_index=True, sort=False) -term_orig - - -# In[113]: - - -# ajout d'un hash unique pour chaque variante -term_orig['id_content_hash'] = term_orig.apply(lambda x: hash(tuple(x[['version', 'cost_factor', 'embargo_months', 'archiving', 'comment']])), axis = 1) -term_orig['id_content_hash_licence'] = term_orig.apply(lambda x: hash(tuple(x[['version', 'cost_factor', 'embargo_months', 'archiving', 'licence', 'comment']])), axis = 1) - - -# In[114]: - - -term_orig.sort_values(by='id_content_hash') - - -# In[115]: - - -# doublons -term_orig.loc[term_orig.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') - - -# In[116]: - - -term_orig['licence'] = term_orig['licence'].fillna(999999) -term_orig['licence'] = term_orig['licence'].astype(int) -term_orig['cost_factor'] = term_orig['cost_factor'].fillna(999999) -term_orig['cost_factor'] = term_orig['cost_factor'].astype(int) -# term_orig['embargo_months'] = term_orig['embargo_months'].fillna(0) -# term_orig['embargo_months'] = term_orig['embargo_months'].astype(int) -term_orig.loc[term_orig['archiving'] == True, 'ir_archiving'] = 1 -term_orig.loc[term_orig['archiving'] == False, 'ir_archiving'] = 0 -term_orig['ir_archiving'] = term_orig['ir_archiving'].fillna(0) -term_orig - - -# In[117]: - - -term_orig.loc[term_orig['ir_archiving'].isna()] - - -# In[118]: - - -term_orig['ir_archiving'].value_counts() - - -# In[119]: - - -term_orig['licence'] = term_orig['licence'].astype(int) -term_orig['ir_archiving'] = term_orig['ir_archiving'].astype(int) -term_orig['cost_factor'] = term_orig['cost_factor'].astype(int) -term_orig - - -# In[120]: - - -terms_export_dates = term_orig.loc[(term_orig['valid_from'].notna()) | (term_orig['valid_until'].notna())][['id_content_hash', 'ror', 'valid_from', 'valid_until']] -terms_export_dates - - -# In[121]: - - -terms_export = term_orig[['id_sherpa', 'rp_id', 'id_content_hash', 'id_content_hash_licence', 'version', 'cost_factor', 'embargo_months', 'ir_archiving', 'licence', 'comment']] -terms_export - - -# In[122]: - - -# test de doublons -terms_export.loc[terms_export.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') - - -# In[123]: - - -terms_export_dedup = terms_export.drop_duplicates(subset=['id_content_hash']) -terms_export_dedup - - -# In[124]: - - -terms_export_dedup_licence = terms_export.drop_duplicates(subset=['id_content_hash_licence']) -terms_export_dedup_licence - - -# In[125]: - - -# test de doublons -terms_export_dedup_licence.loc[terms_export_dedup_licence.duplicated(subset='id_content_hash')].sort_values(by='id_content_hash') - - -# In[126]: - - -# totaux pour les deux sources -terms_export_dedup.loc[terms_export_dedup['id_sherpa'].notna()].shape[0] - - -# In[127]: - - -terms_export_dedup.loc[terms_export_dedup['rp_id'].notna()].shape[0] - - -# In[128]: - - -terms_export_dedup.loc[terms_export_dedup['rp_id'].notna()] - - -# In[129]: - - -# convertir l'index en id -terms_export_dedup.reset_index(inplace=True) -del terms_export_dedup['index'] -terms_export_dedup - - -# In[130]: - - -# ajout de l'id avec l'index + 1 -terms_export_dedup['id'] = terms_export_dedup.index + 1 -# del terms_export_dedup['index'] -terms_export_dedup - - -# In[131]: - - -terms_export_dedup['source'] = '' -terms_export_dedup - - -# In[132]: - - -# grouper par licence -terms_export_dedup_licences = terms_export_dedup_licence[['licence', 'id_content_hash']] -terms_export_dedup_licences - - -# In[133]: - - -# concat valeurs avec même id -terms_export_dedup_licences['licence'] = terms_export_dedup_licences['licence'].astype(str) -terms_export_dedup_licences = terms_export_dedup_licences.groupby('id_content_hash').agg({'licence': lambda x: ', '.join(x)}) -terms_export_dedup_licences - - -# In[134]: - - -# test des valeur multiples -terms_export_dedup_licences.loc[terms_export_dedup_licences['licence'].str.contains(',')] - - -# In[135]: - - -# ajout des licences groupées -terms_export_dedup_fin = pd.merge(terms_export_dedup, terms_export_dedup_licences, on='id_content_hash', how='left') -terms_export_dedup_fin - - -# In[136]: - - -# merge avec les dates pour avoir les terms ids -terms_export_dates = pd.merge(terms_export_dates, terms_export_dedup_fin[['id_content_hash', 'id']], on='id_content_hash') -terms_export_dates = terms_export_dates.rename(columns = {'id' : 'term'}) -terms_export_dates - - -# In[137]: - - -# renommer les champs de licence -del terms_export_dedup_fin['licence_x'] -terms_export_dedup_fin = terms_export_dedup_fin.rename(columns = {'licence_y' : 'licence'}) - - -# In[138]: - - -terms_export_fin = terms_export_dedup_fin[['version', 'cost_factor', 'embargo_months', 'ir_archiving', 'licence', 'comment', 'id', 'source']] -terms_export_fin - - -# In[139]: - - -# export de la table -result = terms_export_fin.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/term.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[140]: - - -# export csv -terms_export_fin.to_csv('sample/term.tsv', index=False) - - -# In[141]: - - -# export excel -terms_export_fin.to_excel('sample/term.xlsx', index=False) - - -# ## Table condition_type - -# In[142]: - - -# Journal-only, Organization-only, Journal-organization agreement -col_names = ['id', - 'condition_issuer' - ] -condition_type = pd.DataFrame(columns = col_names) -condition_type = condition_type.append({'id' : 1, 'condition_issuer' : 'Journal-only'}, ignore_index=True) -condition_type = condition_type.append({'id' : 2, 'condition_issuer' : 'Organization-only'}, ignore_index=True) -condition_type = condition_type.append({'id' : 3, 'condition_issuer' : 'Journal-organization agreement'}, ignore_index=True) -condition_type - - -# In[143]: - - -# export de la table -result = condition_type.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/condition_type.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[144]: - - -# export csv -condition_type.to_csv('sample/condition_type.tsv', index=False) - - -# In[145]: - - -# export excel -condition_type.to_excel('sample/condition_type.xlsx', index=False) - - -# ## Table organization - -# In[146]: - - -# extraction des organizations (funders) -sherpa - - -# In[147]: - - -sherpa.loc[sherpa['prerequisite_funders'].notna()] - - -# In[148]: - - -sherpa['prerequisite_funders'].value_counts() - - -# In[149]: - - -funders = sherpa.loc[sherpa['prerequisite_funders'].notna()][['prerequisite_funders_name', 'prerequisite_funders_fundref', 'prerequisite_funders_ror', 'prerequisite_funders_country', 'prerequisite_funders_url', 'prerequisite_funders_sherpa_id']] -funders - - -# In[150]: - - -funders_dedup = funders.drop_duplicates(subset='prerequisite_funders_ror') -funders_dedup - - -# In[151]: - - -funders_dedup.shape[0] - - -# In[152]: - - -# export excel -funders_dedup.to_excel('sample/funders.xlsx', index=False) - - -# In[153]: - - -# export csv -funders_dedup.to_csv('sample/funders.tsv', index=False) - - -# In[154]: - - -# creation du DF -organization_funders = funders_dedup -organization_funders = organization_funders.rename(columns = {'prerequisite_funders_name' : 'name', - 'prerequisite_funders_fundref' : 'fundref', - 'prerequisite_funders_ror' : 'ror', - 'prerequisite_funders_country' : 'iso_code', - 'prerequisite_funders_url' : 'website', - 'prerequisite_funders_sherpa_id' : 'sherpa_id' - }) -organization_funders - - -# In[155]: - - -# lien avec les pays -country = pd.read_csv('sample/country.tsv', encoding='utf-8', header=0, sep='\t') -country - - -# In[156]: - - -# merge avec les pays -organization_funders['iso_code'] = organization_funders['iso_code'].str.upper() -organization_funders['is_funder'] = 1 -organization_funders = pd.merge(organization_funders, country[['iso_code', 'id']], how='left', on='iso_code') -organization_funders - - -# In[157]: - - -organization_funders = organization_funders.rename(columns = {'id' : 'country'}) -organization_funders - - -# In[158]: - - -# ajout des organizations suisses -organization = pd.read_csv('ror/ror_ch_hei_export.tsv', encoding='utf-8', header=0, sep='\t', dtype={'fundref': str, 'orgref': str}, na_filter=False) -organization - - -# In[159]: - - -# tri par nom -organization = organization.sort_values(by='name') -organization - - -# In[160]: - - -organization = organization.reset_index(drop=True) -organization - - -# In[161]: - - -# mettre l'EPFL en position 1 et UNIGE en 2 -target_row = 32 -# Move target row to first element of list. -idx = [target_row] + [i for i in range(len(organization)) if i != target_row] -organization = organization.iloc[idx] -organization - - -# In[162]: - - -organization = organization.reset_index(drop=True) -organization - - -# In[163]: - - -# mettre l'EPFL en position 1 et UNIGE en 2 -target_row = 45 -# Move target row to first element of list. -idx = [target_row] + [i for i in range(len(organization)) if i != target_row] -organization = organization.iloc[idx] -organization - - -# In[164]: - - -organization = organization.reset_index(drop=True) -organization - - -# In[165]: - - -# ajout des funders -organization = organization.append(organization_funders, ignore_index=True) -organization - - -# In[166]: - - -# remplacement dans le fundref id qui renvoie vers du JSON seulement -# URL actuel : http://data.crossref.org/fundingdata/funder/10.13039/[fundref id] -# ex : http://dx.doi.org/10.13039/501100007903 -# redirigé sur : http://data.crossref.org/fundingdata/funder/10.13039/501100007903 -# URL des publications financées : https://search.crossref.org/funding?q=[fundref id]&from_ui=yes -# ex : https://search.crossref.org/funding?q=501100003006&from_ui=yes -organization['fundref'] = organization['fundref'].str.replace('http://dx.doi.org/10.13039/', '') -organization - - -# In[167]: - - -# df pour l'export -organization_export = organization[['name', 'website', 'country', 'starting_year', 'is_funder', 'ror', 'fundref']] -organization_export - - -# In[168]: - - -# ajout des valeurs vides -organization_export['starting_year'] = organization_export['starting_year'].fillna(0) -organization_export['fundref'] = organization_export['fundref'].fillna('') -organization_export['ror'] = organization_export['ror'].fillna('') -organization_export - - -# In[169]: - - -# ajout de l'id avec l'index + 1 -organization_export['id'] = organization_export.index + 1 -# del terms_export_dedup['index'] -organization_export - - -# In[170]: - - -# export de la table -result = organization_export.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/organization.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[171]: - - -# export excel -organization_export.to_excel('sample/organization.xlsx', index=False) - - -# In[172]: - - -# export csv -organization_export.to_csv('sample/organization.tsv', index=False) - - -# ## Table condition_set_term - -# In[173]: - - -term_orig - - -# In[174]: - - -terms_export_dedup - - -# In[175]: - - -# merge des terms id -term_orig = pd.merge(term_orig, terms_export_dedup[['id_content_hash', 'id']], on='id_content_hash', how='left') -term_orig - - -# In[176]: - - -term_orig = term_orig.rename(columns = {'id' : 'term'}) -term_orig - - -# In[177]: - - -condition_type - - -# In[178]: - - -# merge des condition type -term_orig['condition_type'] = 3 -term_orig.loc[term_orig['ror'].isna(), 'condition_type'] = 1 -term_orig - - -# In[179]: - - -organization_export - - -# In[180]: - - -# merge des organizations -term_orig = pd.merge(term_orig, organization_export[['ror', 'id']], on='ror', how='left') -term_orig - - -# In[181]: - - -term_orig = term_orig.rename(columns = {'id' : 'organization'}) -term_orig - - -# In[182]: - - -# concat valeurs avec même id -condition_set_term_dedup_terms = term_orig[['term', 'id_content_hash']] -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms.drop_duplicates() -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms_dedup.loc[condition_set_term_dedup_terms_dedup['term'].notna()] -condition_set_term_dedup_terms_dedup['term'] = condition_set_term_dedup_terms_dedup['term'].astype(int) -condition_set_term_dedup_terms_dedup['term'] = condition_set_term_dedup_terms_dedup['term'].astype(str) -condition_set_term_dedup_terms_dedup = condition_set_term_dedup_terms_dedup.groupby('id_content_hash').agg({'term': lambda x: ', '.join(x)}) -condition_set_term_dedup_terms_dedup - - -# In[183]: - - -# concat valeurs avec même id -condition_set_term_dedup_journals = term_orig[['journal', 'id_content_hash']] -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals.drop_duplicates() -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals_dedup.loc[condition_set_term_dedup_journals_dedup['journal'].notna()] -condition_set_term_dedup_journals_dedup['journal'] = condition_set_term_dedup_journals_dedup['journal'].astype(int) -condition_set_term_dedup_journals_dedup['journal'] = condition_set_term_dedup_journals_dedup['journal'].astype(str) -condition_set_term_dedup_journals_dedup = condition_set_term_dedup_journals_dedup.groupby('id_content_hash').agg({'journal': lambda x: ', '.join(x)}) -condition_set_term_dedup_journals_dedup - - -# In[184]: - - -# concat valeurs avec même id -condition_set_term_dedup_organizations = term_orig[['organization', 'id_content_hash']] -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations.drop_duplicates() -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations_dedup.loc[condition_set_term_dedup_organizations_dedup['organization'].notna()] -condition_set_term_dedup_organizations_dedup['organization'] = condition_set_term_dedup_organizations_dedup['organization'].astype(int) -condition_set_term_dedup_organizations_dedup['organization'] = condition_set_term_dedup_organizations_dedup['organization'].astype(str) -condition_set_term_dedup_organizations_dedup = condition_set_term_dedup_organizations_dedup.groupby('id_content_hash').agg({'organization': lambda x: ', '.join(x)}) -condition_set_term_dedup_organizations_dedup - - -# In[185]: - - -# concat valeurs avec même id : pas possible pour condition_type -condition_set_term_dedup_condition_types = term_orig[['condition_type', 'id_content_hash']] -condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types.drop_duplicates() -condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types_dedup.loc[condition_set_term_dedup_condition_types_dedup['condition_type'].notna()] -# condition_set_term_dedup_condition_types_dedup['condition_type'] = condition_set_term_dedup_condition_types_dedup['condition_type'].astype(int) -# condition_set_term_dedup_condition_types_dedup['condition_type'] = condition_set_term_dedup_condition_types_dedup['condition_type'].astype(str) -# condition_set_term_dedup_condition_types_dedup = condition_set_term_dedup_condition_types_dedup.groupby('id_content_hash').agg({'condition_type': lambda x: ', '.join(x)}) -condition_set_term_dedup_condition_types_dedup - - -# In[186]: - - -# recuperation des ids groupés -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_terms_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_journals_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_organizations_dedup, on='id_content_hash', how='left') -terms_export_dedup = pd.merge(terms_export_dedup, condition_set_term_dedup_condition_types_dedup, on='id_content_hash', how='left') -terms_export_dedup - - -# In[187]: - - -condition_sets_orig = terms_export_dedup[['term', 'condition_type', 'organization', 'journal']] -condition_sets_orig - - -# In[188]: - - -# ajout d'un hash unique pour chaque variante -condition_sets_orig['id_term_hash'] = condition_sets_orig.apply(lambda x: hash(tuple(x[['condition_type', 'organization', 'journal']])), axis = 1) -condition_sets_orig - - -# In[189]: - - -# grouper les termes qui ont les mêmes valeurs pour le reste -condition_sets_orig_terms = condition_sets_orig[['term', 'id_term_hash']] -condition_sets_orig_terms_dedup = condition_sets_orig_terms.drop_duplicates() -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['term'].notna()] -condition_sets_orig_terms_dedup['term'] = condition_sets_orig_terms_dedup['term'].astype(int) -condition_sets_orig_terms_dedup['term'] = condition_sets_orig_terms_dedup['term'].astype(str) -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.groupby('id_term_hash').agg({'term': lambda x: ', '.join(x)}) -condition_sets_orig_terms_dedup - - -# In[190]: - - -# ajout des ids groupées -condition_sets_orig_terms = pd.merge(condition_sets_orig, condition_sets_orig_terms_dedup, on='id_term_hash', how='left') -condition_sets_orig_terms - - -# In[191]: - - -# rename terms -del condition_sets_orig_terms['term_x'] -condition_sets_orig_terms = condition_sets_orig_terms.rename(columns = {'term_y' : 'term'}) -condition_sets_orig_terms - - -# In[192]: - - -# test duplicates -condition_sets_orig_terms.loc[condition_sets_orig_terms.duplicated()].sort_values(by='term') - - -# In[193]: - - -condition_sets_orig_terms.loc[condition_sets_orig_terms.duplicated()].shape[0] - - -# In[194]: - - -condition_sets_orig_terms_dedup = condition_sets_orig_terms.drop_duplicates() -condition_sets_orig_terms_dedup - - -# In[195]: - - -# ajout des champs manquants -condition_sets_orig_terms_dedup['comment'] = '' - - -# In[196]: - - -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['journal'].isna()] - - -# In[197]: - - -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['term'].isna()] - - -# In[198]: - - -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['condition_type'].isna()] - - -# In[199]: - - -# remplacement des "nan" -condition_sets_orig_terms_dedup.loc[condition_sets_orig_terms_dedup['organization'].isna()] - - -# In[200]: - - -# remplacement des "nan" -condition_sets_orig_terms_dedup['organization'] = condition_sets_orig_terms_dedup['organization'].fillna('') -condition_sets_orig_terms_dedup - - -# In[201]: - - -# convertir l'index en id -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.reset_index() -# ajout de l'id avec l'index + 1 -condition_sets_orig_terms_dedup['id'] = condition_sets_orig_terms_dedup['index'] + 1 -del condition_sets_orig_terms_dedup['index'] -condition_sets_orig_terms_dedup - - -# In[202]: - - -# convertir l'index en id -condition_sets_orig_terms_dedup = condition_sets_orig_terms_dedup.reset_index() -# ajout de l'id avec l'index + 1 -condition_sets_orig_terms_dedup['id'] = condition_sets_orig_terms_dedup['index'] + 1 -del condition_sets_orig_terms_dedup['index'] -condition_sets_orig_terms_dedup - - -# In[203]: - - -# export de la table -result = condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/condition_set.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[204]: - - -# export excel -condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_excel('sample/condition_set.xlsx', index=False) - - -# In[205]: - - -# export csv -condition_sets_orig_terms_dedup[['id', 'condition_type', 'organization', 'journal', 'term', 'comment']].to_csv('sample/condition_set.tsv', index=False) - - -# ## Table organization_condition_set - -# In[206]: - - -condition_sets_orig_terms_dedup - - -# In[207]: - - -condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['organization'].notna()) & (condition_sets_orig_terms_dedup['organization'] != '')] - - -# In[208]: - - -# creation du DF -# col_names = ['id', -# 'organization', -# 'condition_set', -# 'valid_from', -# 'valid_until' -# ] -# organization_condition = pd.DataFrame(columns = col_names) -organization_condition = condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['organization'].notna()) & (condition_sets_orig_terms_dedup['organization'] != '')][['id', 'organization', 'term']] -organization_condition - - -# In[209]: - - -# extraction des terms ids -organization_condition_split = organization_condition.assign(term = organization_condition.term.str.split(',')).explode('term') -organization_condition_split - - -# In[210]: - - -organization_condition_split.loc[organization_condition_split['organization'].isna()] - - -# In[211]: - - -organization_condition_split.loc[organization_condition_split['term'].isna()] - - -# In[212]: - - -organization_condition_split['term'] = organization_condition_split['term'].astype(int) -organization_condition_split - - -# In[213]: - - -# ajout du ROR -terms_export_dates - - -# In[214]: - - -# merge pour obtenir les dates -organization_condition_split = pd.merge(organization_condition_split, terms_export_dates[['term', 'valid_from', 'valid_until']], on='term', how='left') -organization_condition_split - - -# In[215]: - - -# dédoublonage -organization_condition_split_dedup = organization_condition_split.drop_duplicates() -organization_condition_split_dedup - - -# In[216]: - - -organization_condition = pd.merge(organization_condition, organization_condition_split_dedup[['id', 'valid_from', 'valid_until']], on='id', how='left') -organization_condition - - -# In[217]: - - -organization_condition = organization_condition.rename(columns = {'id' : 'condition_set'}) -organization_condition['valid_from'] = organization_condition['valid_from'].fillna('') -organization_condition['valid_until'] = organization_condition['valid_until'].fillna('') -organization_condition - - -# In[218]: - - -# split final pour avoir une ligne par organization -organization_condition_fin = organization_condition.assign(organization = organization_condition.organization.str.split(',')).explode('organization') -organization_condition_fin - - -# In[219]: - - -# ajout de l'id avec l'index + 1 -organization_condition_fin = organization_condition_fin.reset_index() -organization_condition_fin['id'] = organization_condition_fin.index + 1 -del organization_condition_fin['index'] -organization_condition_fin - - -# In[220]: - - -# export de la table -result = organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/organization_condition.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[221]: - - -# export excel -organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_excel('sample/organization_condition.xlsx', index=False) - - -# In[222]: - - -# export csv -organization_condition_fin[['id', 'condition_set', 'organization', 'valid_from', 'valid_until']].to_csv('sample/organization_condition.tsv', index=False) - - -# ## Table journal_condition_set - -# In[223]: - - -# creation du DF -# col_names = ['id', -# 'journal', -# 'condition_set', -# 'valid_from', -# 'valid_until' -# ] -# journal_condition = pd.DataFrame(columns = col_names) -journal_condition = condition_sets_orig_terms_dedup.loc[(condition_sets_orig_terms_dedup['journal'].notna()) & (condition_sets_orig_terms_dedup['journal'] != '')][['id', 'journal']] -journal_condition - - -# In[224]: - - -journal_condition = journal_condition.rename(columns = {'id' : 'condition_set'}) -journal_condition['valid_from'] = '' -journal_condition['valid_until'] = '' -journal_condition - - -# In[225]: - - -# split final pour avoir une ligne par journal -journal_condition_fin = journal_condition.assign(journal = journal_condition.journal.str.split(',')).explode('journal') -journal_condition_fin - - -# In[226]: - - -# ajout de l'id avec l'index + 1 -journal_condition_fin = journal_condition_fin.reset_index() -journal_condition_fin['id'] = journal_condition_fin.index + 1 -del journal_condition_fin['index'] -journal_condition_fin - - -# In[227]: - - -# export de la table -result = journal_condition_fin.to_json(orient='records', force_ascii=False) -parsed = json.loads(result) -with open('sample/journal_condition.json', 'w', encoding='utf-8') as file: - json.dump(parsed, file, indent=2, ensure_ascii=False) - - -# In[228]: - - -# export excel -journal_condition_fin.to_excel('sample/journal_condition.xlsx', index=False) - - -# In[229]: - - -# export csv -journal_condition_fin.to_csv('sample/journal_condition.tsv', index=False) - - -# In[ ]: - - - - diff --git a/import_scripts/99_oacct_import.md b/import_scripts/99_oacct_import.md deleted file mode 100644 index 75df7e4f..00000000 --- a/import_scripts/99_oacct_import.md +++ /dev/null @@ -1,212 +0,0 @@ -# Projet Open Access Compliance Check Tool (OACCT) - -Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 - -Ce notebook permet d'importer les données en utilisant l'API : - -https://oacct-test.epfl.ch/api/ - -Exemple avec Journals : - -https://oacct-test.epfl.ch/api/journal/ - -GET /api/journal/ - -HTTP 200 OK -Allow: GET, POST, HEAD, OPTIONS -Content-Type: application/json -Vary: Accept - -[] - -Media type: application/json - -Content: -``` json -{ - "issn": [], - "name": "", - "name_short_iso_4": "", - "website": "", - "oa_options": "", - "starting_year": null, - "end_year": null, - "doaj_seal": false, - "doaj_status": false, - "lockss": false, - "nlch": false, - "portico": false, - "qoam_av_score": null -} -``` - - - -```python -import json -import requests -import codecs -oacct_login = 'oacct_test' -oacct_pwd = '2f4dBRhyj7' -headers = {'accept': 'application/json'} -``` - - -```python -# test sans authentifications -url = 'https://oacct-test.epfl.ch/api/country/' -r = requests.get(url) -print(r) -``` - - - - - -```python -print(r.text) -``` - - [{"id":1,"name":"Afghanistan","iso_code":"AF"},{"id":249,"name":"Åland Islands","iso_code":"AX"},{"id":2,"name":"Albania","iso_code":"AL"},{"id":3,"name":"Algeria","iso_code":"DZ"},{"id":4,"name":"American Samoa","iso_code":"AS"},{"id":5,"name":"Andorra","iso_code":"AD"},{"id":6,"name":"Angola","iso_code":"AO"},{"id":7,"name":"Anguilla","iso_code":"AI"},{"id":8,"name":"Antarctica","iso_code":"AQ"},{"id":9,"name":"Antigua and Barbuda","iso_code":"AG"},{"id":10,"name":"Argentina","iso_code":"AR"},{"id":11,"name":"Armenia","iso_code":"AM"},{"id":12,"name":"Aruba","iso_code":"AW"},{"id":13,"name":"Australia","iso_code":"AU"},{"id":14,"name":"Austria","iso_code":"AT"},{"id":15,"name":"Azerbaijan","iso_code":"AZ"},{"id":16,"name":"Bahamas (the)","iso_code":"BS"},{"id":17,"name":"Bahrain","iso_code":"BH"},{"id":18,"name":"Bangladesh","iso_code":"BD"},{"id":19,"name":"Barbados","iso_code":"BB"},{"id":20,"name":"Belarus","iso_code":"BY"},{"id":21,"name":"Belgium","iso_code":"BE"},{"id":22,"name":"Belize","iso_code":"BZ"},{"id":23,"name":"Benin","iso_code":"BJ"},{"id":24,"name":"Bermuda","iso_code":"BM"},{"id":25,"name":"Bhutan","iso_code":"BT"},{"id":26,"name":"Bolivia (Plurinational State of)","iso_code":"BO"},{"id":27,"name":"Bonaire, Sint Eustatius and Saba","iso_code":"BQ"},{"id":28,"name":"Bosnia and Herzegovina","iso_code":"BA"},{"id":29,"name":"Botswana","iso_code":"BW"},{"id":30,"name":"Bouvet Island","iso_code":"BV"},{"id":31,"name":"Brazil","iso_code":"BR"},{"id":32,"name":"British Indian Ocean Territory (the)","iso_code":"IO"},{"id":33,"name":"Brunei Darussalam","iso_code":"BN"},{"id":34,"name":"Bulgaria","iso_code":"BG"},{"id":35,"name":"Burkina Faso","iso_code":"BF"},{"id":36,"name":"Burundi","iso_code":"BI"},{"id":37,"name":"Cabo Verde","iso_code":"CV"},{"id":38,"name":"Cambodia","iso_code":"KH"},{"id":39,"name":"Cameroon","iso_code":"CM"},{"id":40,"name":"Canada","iso_code":"CA"},{"id":41,"name":"Cayman Islands (the)","iso_code":"KY"},{"id":42,"name":"Central African Republic (the)","iso_code":"CF"},{"id":43,"name":"Chad","iso_code":"TD"},{"id":44,"name":"Chile","iso_code":"CL"},{"id":45,"name":"China","iso_code":"CN"},{"id":46,"name":"Christmas Island","iso_code":"CX"},{"id":47,"name":"Cocos (Keeling) Islands (the)","iso_code":"CC"},{"id":48,"name":"Colombia","iso_code":"CO"},{"id":49,"name":"Comoros (the)","iso_code":"KM"},{"id":50,"name":"Congo (the Democratic Republic of the)","iso_code":"CD"},{"id":51,"name":"Congo (the)","iso_code":"CG"},{"id":52,"name":"Cook Islands (the)","iso_code":"CK"},{"id":53,"name":"Costa Rica","iso_code":"CR"},{"id":59,"name":"Côte d'Ivoire","iso_code":"CI"},{"id":54,"name":"Croatia","iso_code":"HR"},{"id":55,"name":"Cuba","iso_code":"CU"},{"id":56,"name":"Curaçao","iso_code":"CW"},{"id":57,"name":"Cyprus","iso_code":"CY"},{"id":58,"name":"Czechia","iso_code":"CZ"},{"id":60,"name":"Denmark","iso_code":"DK"},{"id":61,"name":"Djibouti","iso_code":"DJ"},{"id":62,"name":"Dominica","iso_code":"DM"},{"id":63,"name":"Dominican Republic (the)","iso_code":"DO"},{"id":64,"name":"Ecuador","iso_code":"EC"},{"id":65,"name":"Egypt","iso_code":"EG"},{"id":66,"name":"El Salvador","iso_code":"SV"},{"id":67,"name":"Equatorial Guinea","iso_code":"GQ"},{"id":68,"name":"Eritrea","iso_code":"ER"},{"id":69,"name":"Estonia","iso_code":"EE"},{"id":70,"name":"Eswatini","iso_code":"SZ"},{"id":71,"name":"Ethiopia","iso_code":"ET"},{"id":72,"name":"Falkland Islands (the) [Malvinas]","iso_code":"FK"},{"id":73,"name":"Faroe Islands (the)","iso_code":"FO"},{"id":74,"name":"Fiji","iso_code":"FJ"},{"id":75,"name":"Finland","iso_code":"FI"},{"id":76,"name":"France","iso_code":"FR"},{"id":77,"name":"French Guiana","iso_code":"GF"},{"id":78,"name":"French Polynesia","iso_code":"PF"},{"id":79,"name":"French Southern Territories (the)","iso_code":"TF"},{"id":80,"name":"Gabon","iso_code":"GA"},{"id":81,"name":"Gambia (the)","iso_code":"GM"},{"id":82,"name":"Georgia","iso_code":"GE"},{"id":83,"name":"Germany","iso_code":"DE"},{"id":84,"name":"Ghana","iso_code":"GH"},{"id":85,"name":"Gibraltar","iso_code":"GI"},{"id":86,"name":"Greece","iso_code":"GR"},{"id":87,"name":"Greenland","iso_code":"GL"},{"id":88,"name":"Grenada","iso_code":"GD"},{"id":89,"name":"Guadeloupe","iso_code":"GP"},{"id":90,"name":"Guam","iso_code":"GU"},{"id":91,"name":"Guatemala","iso_code":"GT"},{"id":92,"name":"Guernsey","iso_code":"GG"},{"id":93,"name":"Guinea","iso_code":"GN"},{"id":94,"name":"Guinea-Bissau","iso_code":"GW"},{"id":95,"name":"Guyana","iso_code":"GY"},{"id":96,"name":"Haiti","iso_code":"HT"},{"id":97,"name":"Heard Island and McDonald Islands","iso_code":"HM"},{"id":98,"name":"Holy See (the)","iso_code":"VA"},{"id":99,"name":"Honduras","iso_code":"HN"},{"id":100,"name":"Hong Kong","iso_code":"HK"},{"id":101,"name":"Hungary","iso_code":"HU"},{"id":102,"name":"Iceland","iso_code":"IS"},{"id":103,"name":"India","iso_code":"IN"},{"id":104,"name":"Indonesia","iso_code":"ID"},{"id":250,"name":"International Agency","iso_code":"OI"},{"id":105,"name":"Iran (Islamic Republic of)","iso_code":"IR"},{"id":106,"name":"Iraq","iso_code":"IQ"},{"id":107,"name":"Ireland","iso_code":"IE"},{"id":108,"name":"Isle of Man","iso_code":"IM"},{"id":109,"name":"Israel","iso_code":"IL"},{"id":110,"name":"Italy","iso_code":"IT"},{"id":111,"name":"Jamaica","iso_code":"JM"},{"id":112,"name":"Japan","iso_code":"JP"},{"id":113,"name":"Jersey","iso_code":"JE"},{"id":114,"name":"Jordan","iso_code":"JO"},{"id":115,"name":"Kazakhstan","iso_code":"KZ"},{"id":116,"name":"Kenya","iso_code":"KE"},{"id":117,"name":"Kiribati","iso_code":"KI"},{"id":118,"name":"Korea (the Democratic People's Republic of)","iso_code":"KP"},{"id":119,"name":"Korea (the Republic of)","iso_code":"KR"},{"id":120,"name":"Kuwait","iso_code":"KW"},{"id":121,"name":"Kyrgyzstan","iso_code":"KG"},{"id":122,"name":"Lao People's Democratic Republic (the)","iso_code":"LA"},{"id":123,"name":"Latvia","iso_code":"LV"},{"id":124,"name":"Lebanon","iso_code":"LB"},{"id":125,"name":"Lesotho","iso_code":"LS"},{"id":126,"name":"Liberia","iso_code":"LR"},{"id":127,"name":"Libya","iso_code":"LY"},{"id":128,"name":"Liechtenstein","iso_code":"LI"},{"id":129,"name":"Lithuania","iso_code":"LT"},{"id":130,"name":"Luxembourg","iso_code":"LU"},{"id":131,"name":"Macao","iso_code":"MO"},{"id":132,"name":"Madagascar","iso_code":"MG"},{"id":133,"name":"Malawi","iso_code":"MW"},{"id":134,"name":"Malaysia","iso_code":"MY"},{"id":135,"name":"Maldives","iso_code":"MV"},{"id":136,"name":"Mali","iso_code":"ML"},{"id":137,"name":"Malta","iso_code":"MT"},{"id":138,"name":"Marshall Islands (the)","iso_code":"MH"},{"id":139,"name":"Martinique","iso_code":"MQ"},{"id":140,"name":"Mauritania","iso_code":"MR"},{"id":141,"name":"Mauritius","iso_code":"MU"},{"id":142,"name":"Mayotte","iso_code":"YT"},{"id":143,"name":"Mexico","iso_code":"MX"},{"id":144,"name":"Micronesia (Federated States of)","iso_code":"FM"},{"id":145,"name":"Moldova (the Republic of)","iso_code":"MD"},{"id":146,"name":"Monaco","iso_code":"MC"},{"id":147,"name":"Mongolia","iso_code":"MN"},{"id":148,"name":"Montenegro","iso_code":"ME"},{"id":149,"name":"Montserrat","iso_code":"MS"},{"id":150,"name":"Morocco","iso_code":"MA"},{"id":151,"name":"Mozambique","iso_code":"MZ"},{"id":152,"name":"Myanmar","iso_code":"MM"},{"id":153,"name":"Namibia","iso_code":"NA"},{"id":154,"name":"Nauru","iso_code":"NR"},{"id":155,"name":"Nepal","iso_code":"NP"},{"id":156,"name":"Netherlands (the)","iso_code":"NL"},{"id":157,"name":"New Caledonia","iso_code":"NC"},{"id":158,"name":"New Zealand","iso_code":"NZ"},{"id":159,"name":"Nicaragua","iso_code":"NI"},{"id":160,"name":"Niger (the)","iso_code":"NE"},{"id":161,"name":"Nigeria","iso_code":"NG"},{"id":162,"name":"Niue","iso_code":"NU"},{"id":163,"name":"Norfolk Island","iso_code":"NF"},{"id":164,"name":"North Macedonia","iso_code":"MK"},{"id":165,"name":"Northern Mariana Islands (the)","iso_code":"MP"},{"id":166,"name":"Norway","iso_code":"NO"},{"id":167,"name":"Oman","iso_code":"OM"},{"id":168,"name":"Pakistan","iso_code":"PK"},{"id":169,"name":"Palau","iso_code":"PW"},{"id":170,"name":"Palestine, State of","iso_code":"PS"},{"id":171,"name":"Panama","iso_code":"PA"},{"id":172,"name":"Papua New Guinea","iso_code":"PG"},{"id":173,"name":"Paraguay","iso_code":"PY"},{"id":174,"name":"Peru","iso_code":"PE"},{"id":175,"name":"Philippines (the)","iso_code":"PH"},{"id":176,"name":"Pitcairn","iso_code":"PN"},{"id":177,"name":"Poland","iso_code":"PL"},{"id":178,"name":"Portugal","iso_code":"PT"},{"id":179,"name":"Puerto Rico","iso_code":"PR"},{"id":180,"name":"Qatar","iso_code":"QA"},{"id":184,"name":"Réunion","iso_code":"RE"},{"id":181,"name":"Romania","iso_code":"RO"},{"id":182,"name":"Russian Federation (the)","iso_code":"RU"},{"id":183,"name":"Rwanda","iso_code":"RW"},{"id":185,"name":"Saint Barthélemy","iso_code":"BL"},{"id":186,"name":"Saint Helena, Ascension and Tristan da Cunha","iso_code":"SH"},{"id":187,"name":"Saint Kitts and Nevis","iso_code":"KN"},{"id":188,"name":"Saint Lucia","iso_code":"LC"},{"id":189,"name":"Saint Martin (French part)","iso_code":"MF"},{"id":190,"name":"Saint Pierre and Miquelon","iso_code":"PM"},{"id":191,"name":"Saint Vincent and the Grenadines","iso_code":"VC"},{"id":192,"name":"Samoa","iso_code":"WS"},{"id":193,"name":"San Marino","iso_code":"SM"},{"id":194,"name":"Sao Tome and Principe","iso_code":"ST"},{"id":195,"name":"Saudi Arabia","iso_code":"SA"},{"id":196,"name":"Senegal","iso_code":"SN"},{"id":197,"name":"Serbia","iso_code":"RS"},{"id":198,"name":"Seychelles","iso_code":"SC"},{"id":199,"name":"Sierra Leone","iso_code":"SL"},{"id":1000000,"name":"Sildavie2","iso_code":"II"},{"id":200,"name":"Singapore","iso_code":"SG"},{"id":201,"name":"Sint Maarten (Dutch part)","iso_code":"SX"},{"id":202,"name":"Slovakia","iso_code":"SK"},{"id":203,"name":"Slovenia","iso_code":"SI"},{"id":204,"name":"Solomon Islands","iso_code":"SB"},{"id":205,"name":"Somalia","iso_code":"SO"},{"id":206,"name":"South Africa","iso_code":"ZA"},{"id":207,"name":"South Georgia and the South Sandwich Islands","iso_code":"GS"},{"id":208,"name":"South Sudan","iso_code":"SS"},{"id":209,"name":"Spain","iso_code":"ES"},{"id":210,"name":"Sri Lanka","iso_code":"LK"},{"id":211,"name":"Sudan (the)","iso_code":"SD"},{"id":212,"name":"Suriname","iso_code":"SR"},{"id":213,"name":"Svalbard and Jan Mayen","iso_code":"SJ"},{"id":214,"name":"Sweden","iso_code":"SE"},{"id":215,"name":"Switzerland","iso_code":"CH"},{"id":216,"name":"Syrian Arab Republic (the)","iso_code":"SY"},{"id":217,"name":"Taiwan (Province of China)","iso_code":"TW"},{"id":218,"name":"Tajikistan","iso_code":"TJ"},{"id":219,"name":"Tanzania, the United Republic of","iso_code":"TZ"},{"id":220,"name":"Thailand","iso_code":"TH"},{"id":221,"name":"Timor-Leste","iso_code":"TL"},{"id":222,"name":"Togo","iso_code":"TG"},{"id":223,"name":"Tokelau","iso_code":"TK"},{"id":224,"name":"Tonga","iso_code":"TO"},{"id":225,"name":"Trinidad and Tobago","iso_code":"TT"},{"id":226,"name":"Tunisia","iso_code":"TN"},{"id":227,"name":"Turkey","iso_code":"TR"},{"id":228,"name":"Turkmenistan","iso_code":"TM"},{"id":229,"name":"Turks and Caicos Islands (the)","iso_code":"TC"},{"id":230,"name":"Tuvalu","iso_code":"TV"},{"id":231,"name":"Uganda","iso_code":"UG"},{"id":232,"name":"Ukraine","iso_code":"UA"},{"id":233,"name":"United Arab Emirates (the)","iso_code":"AE"},{"id":234,"name":"United Kingdom of Great Britain and Northern Ireland (the)","iso_code":"GB"},{"id":235,"name":"United States Minor Outlying Islands (the)","iso_code":"UM"},{"id":236,"name":"United States of America (the)","iso_code":"US"},{"id":999999,"name":"UNKNOWN","iso_code":"__"},{"id":237,"name":"Uruguay","iso_code":"UY"},{"id":238,"name":"Uzbekistan","iso_code":"UZ"},{"id":239,"name":"Vanuatu","iso_code":"VU"},{"id":240,"name":"Venezuela (Bolivarian Republic of)","iso_code":"VE"},{"id":241,"name":"Viet Nam","iso_code":"VN"},{"id":242,"name":"Virgin Islands (British)","iso_code":"VG"},{"id":243,"name":"Virgin Islands (U.S.)","iso_code":"VI"},{"id":244,"name":"Wallis and Futuna","iso_code":"WF"},{"id":245,"name":"Western Sahara*","iso_code":"EH"},{"id":246,"name":"Yemen","iso_code":"YE"},{"id":247,"name":"Zambia","iso_code":"ZM"},{"id":248,"name":"Zimbabwe","iso_code":"ZW"}] - - - -```python -# test avec authentification -url = 'https://oacct-test.epfl.ch/api/country/3' -r2 = requests.get(url, auth=(oacct_login, oacct_pwd)) -print(r2) -``` - - - - - -```python -print(r2.text) -``` - - {"id":3,"name":"Algeria","iso_code":"DZ"} - - - -```python -journal = { - "id": 1, - "name": "Revue médicale suisse", - "name_short_iso_4": "Rev. méd. suisse", - "starting_year": "2005", - "end_year": "9999", - "website": "", - "country": 215.0, - "language": "138", - "publisher": "1", - "doaj_seal": 0, - "doaj_status": 0, - "lockss": 0, - "portico": 0, - "nlch": 0, - "qoam_av_score": "", - "oa_status": 1, - "issn": "1234-5678" - } -``` - - -```python -# test avec post -url = 'https://oacct-test.epfl.ch/api/journal/' -r2 = requests.post(url, auth=(oacct_login, oacct_pwd), headers=headers, data=journal) -print(r2) -``` - - - - - -```python -print(r2.text) -``` - - {"issn":["This field is required."]} - - - -```python -country = { - "name": "Sildavie", - "iso_code": "II", - "id": 333 - } -``` - - -```python -# test avec post -url = 'https://oacct-test.epfl.ch/api/country/' -r2 = requests.post(url, auth=(oacct_login, oacct_pwd), headers=headers, data=country) -print(r2) -``` - - - - - -```python -print(r2.json()) -``` - - {'id': 1000001, 'name': 'Sildavie', 'iso_code': 'II'} - - - -```python -country2 = { - "id": 1000000, - "name": "Sildavie3", - "iso_code": "II" -} -``` - - -```python -# test avec put -url = 'https://oacct-test.epfl.ch/api/country/1000000' -r2 = requests.put(url, auth=(oacct_login, oacct_pwd), headers=headers, data=country2) -print(r2) -``` - - - - - -```python -print(r2.json()) -``` - - {'id': 1000000, 'name': 'Sildavie2', 'iso_code': 'II'} - - - -```python -# convert to json -json_response = r2.json() -print(json_response) -``` - - {'id': 1000000, 'name': 'Sildavie2', 'iso_code': 'II'} - - - -```python -# get the name -name = json_response['name'] -name -``` - - - - - 'Sildavie2' - - diff --git a/import_scripts/99_oacct_import.py b/import_scripts/99_oacct_import.py deleted file mode 100644 index 8e2588c1..00000000 --- a/import_scripts/99_oacct_import.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -# # Projet Open Access Compliance Check Tool (OACCT) -# -# Projet P5 de la bibliothèque de l'EPFL en collaboration avec les bibliothèques des Universités de Genève, Lausanne et Berne : https://www.swissuniversities.ch/themen/digitalisierung/p-5-wissenschaftliche-information/projekte/swiss-mooc-service-1-1-1-1 -# -# Ce notebook permet d'importer les données en utilisant l'API : -# -# https://oacct-test.epfl.ch/api/ -# -# Exemple avec Journals : -# -# https://oacct-test.epfl.ch/api/journal/ -# -# GET /api/journal/ -# -# HTTP 200 OK -# Allow: GET, POST, HEAD, OPTIONS -# Content-Type: application/json -# Vary: Accept -# -# [] -# -# Media type: application/json -# -# Content: -# ``` json -# { -# "issn": [], -# "name": "", -# "name_short_iso_4": "", -# "website": "", -# "oa_options": "", -# "starting_year": null, -# "end_year": null, -# "doaj_seal": false, -# "doaj_status": false, -# "lockss": false, -# "nlch": false, -# "portico": false, -# "qoam_av_score": null -# } -# ``` -# - -# In[1]: - - -import json -import requests -import codecs -oacct_login = 'oacct_test' -oacct_pwd = '2f4dBRhyj7' -headers = {'accept': 'application/json'} - - -# In[2]: - - -# test sans authentifications -url = 'https://oacct-test.epfl.ch/api/country/' -r = requests.get(url) -print(r) - - -# In[3]: - - -print(r.text) - - -# In[6]: - - -# test avec authentification -url = 'https://oacct-test.epfl.ch/api/country/3' -r2 = requests.get(url, auth=(oacct_login, oacct_pwd)) -print(r2) - - -# In[7]: - - -print(r2.text) - - -# In[9]: - - -journal = { - "id": 1, - "name": "Revue médicale suisse", - "name_short_iso_4": "Rev. méd. suisse", - "starting_year": "2005", - "end_year": "9999", - "website": "", - "country": 215.0, - "language": "138", - "publisher": "1", - "doaj_seal": 0, - "doaj_status": 0, - "lockss": 0, - "portico": 0, - "nlch": 0, - "qoam_av_score": "", - "oa_status": 1, - "issn": "1234-5678" - } - - -# In[11]: - - -# test avec post -url = 'https://oacct-test.epfl.ch/api/journal/' -r2 = requests.post(url, auth=(oacct_login, oacct_pwd), headers=headers, data=journal) -print(r2) - - -# In[12]: - - -print(r2.text) - - -# In[13]: - - -country = { - "name": "Sildavie", - "iso_code": "II", - "id": 333 - } - - -# In[14]: - - -# test avec post -url = 'https://oacct-test.epfl.ch/api/country/' -r2 = requests.post(url, auth=(oacct_login, oacct_pwd), headers=headers, data=country) -print(r2) - - -# In[15]: - - -print(r2.json()) - - -# In[16]: - - -country2 = { - "id": 1000000, - "name": "Sildavie3", - "iso_code": "II" -} - - -# In[17]: - - -# test avec put -url = 'https://oacct-test.epfl.ch/api/country/1000000' -r2 = requests.put(url, auth=(oacct_login, oacct_pwd), headers=headers, data=country2) -print(r2) - - -# In[18]: - - -print(r2.json()) - - -# In[19]: - - -# convert to json -json_response = r2.json() -print(json_response) - - -# In[20]: - - -# get the name -name = json_response['name'] -name - diff --git a/import_scripts/README.md b/import_scripts/README.md deleted file mode 100644 index 041865ab..00000000 --- a/import_scripts/README.md +++ /dev/null @@ -1,9 +0,0 @@ -Original IPython notebooks converted to pure Python scripts and Markdown documents: - -``` -ipython nbconvert --to script *.ipynb -ipython nbconvert --to markdown *.ipynb -``` -=> easier Git version control - -Snapshot on 2021-09-23 AB diff --git a/sphinx/django_api.rst b/sphinx/django_api.rst index c4c26c85..13445081 100644 --- a/sphinx/django_api.rst +++ b/sphinx/django_api.rst @@ -1,79 +1,79 @@ django\_api package =================== -The django\_api package implements the admin backend and web service components of the OACCT application. +The django\_api package implements the admin backend and web service components of the OACT application. It uses the Django REST framework https://www.django-rest-framework.org/ and the standard Django admin site. Subpackages ----------- .. toctree:: :maxdepth: 4 Submodules ---------- django\_api.admin module ------------------------ .. automodule:: django_api.admin :members: :undoc-members: :show-inheritance: django\_api.apps module ----------------------- .. automodule:: django_api.apps :members: :undoc-members: :show-inheritance: django\_api.models module ------------------------- .. automodule:: django_api.models :members: :undoc-members: :show-inheritance: django\_api.serializers module ------------------------------ .. automodule:: django_api.serializers :members: :undoc-members: :show-inheritance: django\_api.tests module ------------------------ .. automodule:: django_api.tests :members: :undoc-members: :show-inheritance: django\_api.urls module ----------------------- .. automodule:: django_api.urls :members: :undoc-members: :show-inheritance: django\_api.views module ------------------------ .. automodule:: django_api.views :members: :undoc-members: :show-inheritance: Module contents --------------- .. automodule:: django_api :members: :undoc-members: :show-inheritance: diff --git a/sphinx/index.rst b/sphinx/index.rst index 482678e2..7dc1cd0e 100644 --- a/sphinx/index.rst +++ b/sphinx/index.rst @@ -1,29 +1,29 @@ -.. OACCT documentation master file, created by +.. OACT documentation master file, created by sphinx-quickstart on Mon Sep 6 14:29:45 2021. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to OACCT's documentation! +Welcome to OACT's documentation! ================================= .. toctree:: :maxdepth: 2 :caption: Contents: Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` * :doc:`sphinx_howto` Contents -------- .. toctree:: modules sphinx_howto diff --git a/sphinx/modules.rst b/sphinx/modules.rst index 8fca2f03..e73be6f6 100644 --- a/sphinx/modules.rst +++ b/sphinx/modules.rst @@ -1,9 +1,9 @@ -open-access-compliance-check-tool-oacct +open-access-check-tool-oact ======================================= .. toctree:: :maxdepth: 4 django_api django_app manage diff --git a/static/assets/by-nc-sa.png b/static/assets/by-nc-sa.png new file mode 100644 index 00000000..b9a55533 Binary files /dev/null and b/static/assets/by-nc-sa.png differ diff --git a/styleguide.config.js b/styleguide.config.js index 173e4ac3..1fe9bb61 100644 --- a/styleguide.config.js +++ b/styleguide.config.js @@ -1,89 +1,89 @@ // const path = require('path'); module.exports = { - title:"OACCT Documentation", - version:"0.0.1", + title:"OACT Documentation", + version:"1.0", theme: { color: { link: '#3771C8', linkHover: '#D40000' }, fontFamily: { - base: '"Helvetica Neue", cursive' + base: 'Helvetica, Arial, sans-serif' } }, styleguideDir:"reactDoc/styleguide", // styles: { // Logo: { // // We're changing the LogoRenderer component // logo: { // // We're changing the rsg--logo-XX class name inside the component // // animation: '$blink ease-in-out 300ms infinite' // // }, // // '@keyframes blink': { // // to: { opacity: 0 } // // } // } // }, sections: [ { name: 'Introduction', content: './assets/docs/introduction.md' }, { name: 'Documentation', sections: [ { name: 'Installation', content: './assets/docs/installation.md', description: 'The description for the installation section' }, { name: '[Django]Backend Configuration', description: 'The description for the backend section using Django', content: './assets/docs/backend_configuration.md' }, { name: '[React]Frontend Configuration', description: 'The description for the ins Frontend section using React.js', content: './assets/docs/frontend_configuration.md' }, { name: 'Live Demo', external: true, href: 'https://oacct-test.epfl.ch/#/' } ] }, { name: 'React UI Components', // content: 'docs/ui.md', components: ['./assets/src/pages/**/*.js','./assets/src/components/**/*.js'], exampleMode: 'expand', // 'hide' | 'collapse' | 'expand' usageMode: 'expand' // 'hide' | 'collapse' | 'expand' }, { name: 'React context', // content: 'docs/ui.md', components: './assets/src/ContextProvider.js', content: './assets/src/ContextProvider.md', exampleMode: 'expand', // 'hide' | 'collapse' | 'expand' usageMode: 'expand' // 'hide' | 'collapse' | 'expand' }, { name: 'API', content: './assets/src/services/api.md', exampleMode: 'expand', // 'hide' | 'collapse' | 'expand' usageMode: 'expand', // 'hide' | 'collapse' | 'expand' sections: [ { name: 'Requests', content: './assets/src/services/requests/requests.md', description: 'What are the requests syntax?' }, ] } ] -} \ No newline at end of file +} diff --git a/templates/admin/are_you_sure.html b/templates/admin/are_you_sure.html new file mode 100644 index 00000000..0eb9e112 --- /dev/null +++ b/templates/admin/are_you_sure.html @@ -0,0 +1,19 @@ +{% extends "admin/base_site.html" %} + +{% block content %} + +
+ {% csrf_token %} +

+ {{ text }} +

+ + {% for obj in queryset %} + + {% endfor %} + + + + +
+{% endblock %} \ No newline at end of file diff --git a/templates/assets/index.html b/templates/assets/index.html index 923ff045..dff7dc9a 100644 --- a/templates/assets/index.html +++ b/templates/assets/index.html @@ -1,39 +1,39 @@ {% load static %} - OACCT | Test Version + OACT