<aname="L6"></a><ttclass="py-lineno"> 6</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># This program is free software; you can redistribute it and/or</tt></tt>
<aname="L7"></a><ttclass="py-lineno"> 7</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># modify it under the terms of the GNU General Public License</tt></tt>
<aname="L8"></a><ttclass="py-lineno"> 8</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># as published by the Free Software Foundation; either version 2 </tt></tt>
<aname="L9"></a><ttclass="py-lineno"> 9</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># of the License, or (at your option) any later version.</tt></tt>
<aname="L11"></a><ttclass="py-lineno"> 11</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># This program is distributed in the hope that it will be useful,</tt></tt>
<aname="L12"></a><ttclass="py-lineno"> 12</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># but WITHOUT ANY WARRANTY; without even the implied warranty of</tt></tt>
<aname="L13"></a><ttclass="py-lineno"> 13</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the</tt></tt>
<aname="L14"></a><ttclass="py-lineno"> 14</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># GNU General Public License for more details. </tt></tt>
<aname="L16"></a><ttclass="py-lineno"> 16</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># You should have received a copy of the GNU General Public License</tt></tt>
<aname="L17"></a><ttclass="py-lineno"> 17</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># along with this program; if not, write to the Free Software</tt></tt>
<aname="L18"></a><ttclass="py-lineno"> 18</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.</tt></tt>
<aname="L25"></a><ttclass="py-lineno"> 25</tt><ttclass="py-line"><ttclass="py-comment"># Citeseer provides two ways to search for documents: its own search</tt></tt>
<aname="L26"></a><ttclass="py-lineno"> 26</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># engine, and Google. This code use the first solution (as Google's</tt></tt>
<aname="L27"></a><ttclass="py-lineno"> 27</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># search API is not what it used to be...): first, all the links to</tt></tt>
<aname="L28"></a><ttclass="py-lineno"> 28</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># all the detailed citation pages are collected, then each page is</tt></tt>
<aname="L29"></a><ttclass="py-lineno"> 29</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># parsed, and the bibtex and abstract are extracted.</tt></tt>
</div><divid="ResultScraper-collapsed"style="display:none;"pad="+++"indent="++++"></div><divid="ResultScraper-expanded"><aname="L51"></a><ttclass="py-lineno"> 51</tt><ttclass="py-line"><ttclass="py-docstring">"""Parse a Citeseer result page containing links to the actual</tt></tt>
</div><divid="ResultScraper.count-collapsed"style="display:none;"pad="+++"indent="++++++++"></div><divid="ResultScraper.count-expanded"><aname="L65"></a><ttclass="py-lineno"> 65</tt><ttclass="py-line"><ttclass="py-docstring">"""Return the overall result count."""</tt></tt>
<aname="L66"></a><ttclass="py-lineno"> 66</tt><ttclass="py-line"><ttclass="py-comment"># the result count is immediately before the list of results,</tt></tt>
<aname="L67"></a><ttclass="py-lineno"> 67</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># unless we see no RIS comments, in which case there is no</tt></tt>
<aname="L68"></a><ttclass="py-lineno"> 68</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># result at all.</tt></tt>
</div><divid="ResultScraper.links-collapsed"style="display:none;"pad="+++"indent="++++++++"></div><divid="ResultScraper.links-expanded"><aname="L81"></a><ttclass="py-lineno"> 81</tt><ttclass="py-line"><ttclass="py-docstring">"""Return the result links."""</tt></tt>
</div><divid="CitationScraper-collapsed"style="display:none;"pad="+++"indent="++++"></div><divid="CitationScraper-expanded"><aname="L94"></a><ttclass="py-lineno"> 94</tt><ttclass="py-line"><ttclass="py-docstring">"""Parse a detailed citation page, containing an abstract and a</tt></tt>
</div><divid="Citeseer-collapsed"style="display:none;"pad="+++"indent="++++"></div><divid="Citeseer-expanded"><aname="L110"></a><ttclass="py-lineno">110</tt><ttclass="py-line"><ttclass="py-docstring">"""A connection to Citeseer."""</tt></tt>
<aname="L115"></a><ttclass="py-lineno">115</tt><ttclass="py-line"><ttid="link-39"class="py-name"targets="Variable Pyblio.External.Citeseer.Citeseer.FETCHER_POOL=Pyblio.External.Citeseer.Citeseer-class.html#FETCHER_POOL"><atitle="Pyblio.External.Citeseer.Citeseer.FETCHER_POOL"class="py-name"href="#"onclick="return doclink('link-39', 'FETCHER_POOL', 'link-39');">FETCHER_POOL</a></tt><ttclass="py-op">=</tt><ttclass="py-number">2</tt><ttclass="py-comment"># how many detailed pages to fetch at a time</tt></tt>
Pyblio.Stores.bsddbstore.log"class="py-name"href="#"onclick="return doclink('link-76', 'log', 'link-17');">log</a></tt><ttclass="py-op">.</tt><ttclass="py-name">warn</tt><ttclass="py-op">(</tt><ttclass="py-string">'page has no bibtex field?'</tt><ttclass="py-op">)</tt></tt>
<aname="L226"></a><ttclass="py-lineno">226</tt><ttclass="py-line"><ttclass="py-comment"># we are done once there is no pending link to fetch</tt></tt>
<aname="L227"></a><ttclass="py-lineno">227</tt><ttclass="py-line"><ttclass="py-comment"></tt><ttclass="py-comment"># and all the running fetchers have returned.</tt></tt>
<aname="L234"></a><ttclass="py-lineno">234</tt><ttclass="py-line"><ttclass="py-docstring">"""Handle a result page."""</tt></tt>
<aname="L235"></a><ttclass="py-lineno">235</tt><ttclass="py-line"><ttclass="py-comment"># initial pass, collect all the results, up to maxhits</tt></tt>
Pyblio.Stores.bsddbstore.log"class="py-name"href="#"onclick="return doclink('link-99', 'log', 'link-17');">log</a></tt><ttclass="py-op">.</tt><ttclass="py-name">info</tt><ttclass="py-op">(</tt><ttclass="py-string">'%d results for the query'</tt><ttclass="py-op">%</tt><ttclass="py-name">self</tt><ttclass="py-op">.</tt><ttclass="py-name">_total</tt><ttclass="py-op">)</tt></tt>
Pyblio.Stores.bsddbstore.log"class="py-name"href="#"onclick="return doclink('link-102', 'log', 'link-17');">log</a></tt><ttclass="py-op">.</tt><ttclass="py-name">warn</tt><ttclass="py-op">(</tt><ttclass="py-string">'this batch did not provide new links, stopping'</tt><ttclass="py-op">)</tt></tt>
Pyblio.Stores.bsddbstore.log"class="py-name"href="#"onclick="return doclink('link-104', 'log', 'link-17');">log</a></tt><ttclass="py-op">.</tt><ttclass="py-name">info</tt><ttclass="py-op">(</tt><ttclass="py-string">'%d links in this batch (%s/%d)'</tt><ttclass="py-op">%</tt><ttclass="py-op">(</tt></tt>