|
What this is
Other links
The source code// $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/tests/utilTests/HTMLLinkProcessorTest.java,v 1.2 2004/02/11 02:16:57 woolfel Exp $ /* * ==================================================================== * Copyright 2002-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ // The developers of JMeter and Apache are greatful to the developers // of HTMLParser for giving Apache Software Foundation a non-exclusive // license. The performance benefits of HTMLParser are clear and the // users of JMeter will benefit from the hard work the HTMLParser // team. For detailed information about HTMLParser, the project is // hosted on sourceforge at http://htmlparser.sourceforge.net/. // // HTMLParser was originally created by Somik Raha in 2000. Since then // a healthy community of users has formed and helped refine the // design so that it is able to tackle the difficult task of parsing // dirty HTML. Derrick Oswald is the current lead developer and was kind // enough to assist JMeter. package org.htmlparser.tests.utilTests; import org.htmlparser.tags.LinkTag; import org.htmlparser.tests.ParserTestCase; import org.htmlparser.util.LinkProcessor; import org.htmlparser.util.ParserException; public class HTMLLinkProcessorTest extends ParserTestCase { private LinkProcessor lp; public HTMLLinkProcessorTest(String name) { super(name); } protected void setUp() { lp = new LinkProcessor(); } public void testIsURL() { String resourceLoc1 = "http://someurl.com"; String resourceLoc2 = "myfilehttp.dat"; assertTrue( resourceLoc1 + " should be a url", LinkProcessor.isURL(resourceLoc1)); assertTrue( resourceLoc2 + " should not be a url", !LinkProcessor.isURL(resourceLoc2)); String resourceLoc3 = "file://localhost/D:/java/jdk1.3/docs/api/overview-summary.html"; assertTrue( resourceLoc3 + " should be a url", LinkProcessor.isURL(resourceLoc3)); } public void testFixSpaces() { String url = "http://htmlparser.sourceforge.net/test/This is a Test Page.html"; String fixedURL = LinkProcessor.fixSpaces(url); int index = fixedURL.indexOf(" "); assertEquals( "Expected", "http://htmlparser.sourceforge.net/test/This%20is%20a%20Test%20Page.html", fixedURL); } /** * Reproduction of bug 673379 reported by Joe Robbins. Parser goes into * infinte loop if the link has no slashes. */ public void testLinkWithNoSlashes() throws Exception { createParser("Foo", "http://www.oygevalt.com"); parser.registerScanners(); parseAndAssertNodeCount(1); assertTrue(node[0] instanceof LinkTag); LinkTag linkTag = (LinkTag) node[0]; assertStringEquals( "link", "http://www.oygevalt.com/foo.txt", linkTag.getLink()); assertEquals("link", "Foo", linkTag.getLinkText()); } // // Tests from Appendix C Examples of Resolving Relative URI References // RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax // T. Berners-Lee et al. // http://www.ietf.org/rfc/rfc2396.txt // Within an object with a well-defined base URI of static final String baseURI = "http://a/b/c/d;p?q"; // the relative URI would be resolved as follows: // C.1. Normal Examples // g:h = g:h // g = http://a/b/c/g // ./g = http://a/b/c/g // g/ = http://a/b/c/g/ // /g = http://a/g // //g = http://g // ?y = http://a/b/c/?y // g?y = http://a/b/c/g?y // #s = (current document)#s // g#s = http://a/b/c/g#s // g?y#s = http://a/b/c/g?y#s // ;x = http://a/b/c/;x // g;x = http://a/b/c/g;x // g;x?y#s = http://a/b/c/g;x?y#s // . = http://a/b/c/ // ./ = http://a/b/c/ // .. = http://a/b/ // ../ = http://a/b/ // ../g = http://a/b/g // ../.. = http://a/ // ../../ = http://a/ // ../../g = http://a/g public void test1() throws ParserException { assertEquals( "test1 failed", "https:h", (new LinkProcessor()).extract("https:h", baseURI)); } public void test2() throws ParserException { assertEquals( "test2 failed", "http://a/b/c/g", (new LinkProcessor()).extract("g", baseURI)); } public void test3() throws ParserException { assertEquals( "test3 failed", "http://a/b/c/g", (new LinkProcessor()).extract("./g", baseURI)); } public void test4() throws ParserException { assertEquals( "test4 failed", "http://a/b/c/g/", (new LinkProcessor()).extract("g/", baseURI)); } public void test5() throws ParserException { assertEquals( "test5 failed", "http://a/g", (new LinkProcessor()).extract("/g", baseURI)); } public void test6() throws ParserException { assertEquals( "test6 failed", "http://g", (new LinkProcessor()).extract("//g", baseURI)); } public void test7() throws ParserException { assertEquals( "test7 failed", "http://a/b/c/?y", (new LinkProcessor()).extract("?y", baseURI)); } public void test8() throws ParserException { assertEquals( "test8 failed", "http://a/b/c/g?y", (new LinkProcessor()).extract("g?y", baseURI)); } public void test9() throws ParserException { assertEquals( "test9 failed", "https:h", (new LinkProcessor()).extract("https:h", baseURI)); } public void test10() throws ParserException { assertEquals( "test10 failed", "https:h", (new LinkProcessor()).extract("https:h", baseURI)); } // #s = (current document)#s public void test11() throws ParserException { assertEquals( "test11 failed", "http://a/b/c/g#s", (new LinkProcessor()).extract("g#s", baseURI)); } public void test12() throws ParserException { assertEquals( "test12 failed", "http://a/b/c/g?y#s", (new LinkProcessor()).extract("g?y#s", baseURI)); } public void test13() throws ParserException { assertEquals( "test13 failed", "http://a/b/c/;x", (new LinkProcessor()).extract(";x", baseURI)); } public void test14() throws ParserException { assertEquals( "test14 failed", "http://a/b/c/g;x", (new LinkProcessor()).extract("g;x", baseURI)); } public void test15() throws ParserException { assertEquals( "test15 failed", "http://a/b/c/g;x?y#s", (new LinkProcessor()).extract("g;x?y#s", baseURI)); } public void test16() throws ParserException { assertEquals( "test16 failed", "http://a/b/c/", (new LinkProcessor()).extract(".", baseURI)); } public void test17() throws ParserException { assertEquals( "test17 failed", "http://a/b/c/", (new LinkProcessor()).extract("./", baseURI)); } public void test18() throws ParserException { assertEquals( "test18 failed", "http://a/b/", (new LinkProcessor()).extract("..", baseURI)); } public void test19() throws ParserException { assertEquals( "test19 failed", "http://a/b/", (new LinkProcessor()).extract("../", baseURI)); } public void test20() throws ParserException { assertEquals( "test20 failed", "http://a/b/g", (new LinkProcessor()).extract("../g", baseURI)); } public void test21() throws ParserException { assertEquals( "test21 failed", "http://a/", (new LinkProcessor()).extract("../..", baseURI)); } public void test22() throws ParserException { assertEquals( "test22 failed", "http://a/g", (new LinkProcessor()).extract("../../g", baseURI)); } // C.2. Abnormal Examples // Although the following abnormal examples are unlikely to occur in // normal practice, all URI parsers should be capable of resolving them // consistently. Each example uses the same base as above. // // An empty reference refers to the start of the current document. // // <> = (current document) // // Parsers must be careful in handling the case where there are more // relative path ".." segments than there are hierarchical levels in the // base URI's path. Note that the ".." syntax cannot be used to change // the authority component of a URI. // // ../../../g = http://a/../g // ../../../../g = http://a/../../g // // In practice, some implementations strip leading relative symbolic // elements (".", "..") after applying a relative URI calculation, based // on the theory that compensating for obvious author errors is better // than allowing the request to fail. Thus, the above two references // will be interpreted as "http://a/g" by some implementations. // // Similarly, parsers must avoid treating "." and ".." as special when // they are not complete components of a relative path. // // /./g = http://a/./g // /../g = http://a/../g // g. = http://a/b/c/g. // .g = http://a/b/c/.g // g.. = http://a/b/c/g.. // ..g = http://a/b/c/..g // // Less likely are cases where the relative URI uses unnecessary or // nonsensical forms of the "." and ".." complete path segments. // // ./../g = http://a/b/g // ./g/. = http://a/b/c/g/ // g/./h = http://a/b/c/g/h // g/../h = http://a/b/c/h // g;x=1/./y = http://a/b/c/g;x=1/y // g;x=1/../y = http://a/b/c/y // // All client applications remove the query component from the base URI // before resolving relative URI. However, some applications fail to // separate the reference's query and/or fragment components from a // relative path before merging it with the base path. This error is // rarely noticed, since typical usage of a fragment never includes the // hierarchy ("/") character, and the query component is not normally // used within relative references. // // g?y/./x = http://a/b/c/g?y/./x // g?y/../x = http://a/b/c/g?y/../x // g#s/./x = http://a/b/c/g#s/./x // g#s/../x = http://a/b/c/g#s/../x // // Some parsers allow the scheme name to be present in a relative URI if // it is the same as the base URI scheme. This is considered to be a // loophole in prior specifications of partial URI [RFC1630]. Its use // should be avoided. // // http:g = http:g ; for validating parsers // | http://a/b/c/g ; for backwards compatibility // public void test23 () throws HTMLParserException // { // assertEquals ("test23 failed", "http://a/../g", (new HTMLLinkProcessor ()).extract ("../../../g", baseURI)); // } // public void test24 () throws HTMLParserException // { // assertEquals ("test24 failed", "http://a/../../g", (new HTMLLinkProcessor ()).extract ("../../../../g", baseURI)); // } public void test23() throws ParserException { assertEquals( "test23 failed", "http://a/g", (new LinkProcessor()).extract("../../../g", baseURI)); } public void test24() throws ParserException { assertEquals( "test24 failed", "http://a/g", (new LinkProcessor()).extract("../../../../g", baseURI)); } public void test25() throws ParserException { assertEquals( "test25 failed", "http://a/./g", (new LinkProcessor()).extract("/./g", baseURI)); } public void test26() throws ParserException { assertEquals( "test26 failed", "http://a/../g", (new LinkProcessor()).extract("/../g", baseURI)); } public void test27() throws ParserException { assertEquals( "test27 failed", "http://a/b/c/g.", (new LinkProcessor()).extract("g.", baseURI)); } public void test28() throws ParserException { assertEquals( "test28 failed", "http://a/b/c/.g", (new LinkProcessor()).extract(".g", baseURI)); } public void test29() throws ParserException { assertEquals( "test29 failed", "http://a/b/c/g..", (new LinkProcessor()).extract("g..", baseURI)); } public void test30() throws ParserException { assertEquals( "test30 failed", "http://a/b/c/..g", (new LinkProcessor()).extract("..g", baseURI)); } public void test31() throws ParserException { assertEquals( "test31 failed", "http://a/b/g", (new LinkProcessor()).extract("./../g", baseURI)); } public void test32() throws ParserException { assertEquals( "test32 failed", "http://a/b/c/g/", (new LinkProcessor()).extract("./g/.", baseURI)); } public void test33() throws ParserException { assertEquals( "test33 failed", "http://a/b/c/g/h", (new LinkProcessor()).extract("g/./h", baseURI)); } public void test34() throws ParserException { assertEquals( "test34 failed", "http://a/b/c/h", (new LinkProcessor()).extract("g/../h", baseURI)); } public void test35() throws ParserException { assertEquals( "test35 failed", "http://a/b/c/g;x=1/y", (new LinkProcessor()).extract("g;x=1/./y", baseURI)); } public void test36() throws ParserException { assertEquals( "test36 failed", "http://a/b/c/y", (new LinkProcessor()).extract("g;x=1/../y", baseURI)); } public void test37() throws ParserException { assertEquals( "test37 failed", "http://a/b/c/g?y/./x", (new LinkProcessor()).extract("g?y/./x", baseURI)); } public void test38() throws ParserException { assertEquals( "test38 failed", "http://a/b/c/g?y/../x", (new LinkProcessor()).extract("g?y/../x", baseURI)); } public void test39() throws ParserException { assertEquals( "test39 failed", "http://a/b/c/g#s/./x", (new LinkProcessor()).extract("g#s/./x", baseURI)); } public void test40() throws ParserException { assertEquals( "test40 failed", "http://a/b/c/g#s/../x", (new LinkProcessor()).extract("g#s/../x", baseURI)); } // public void test41 () throws HTMLParserException // { // assertEquals ("test41 failed", "http:g", (new HTMLLinkProcessor ()).extract ("http:g", baseURI)); // } public void test41() throws ParserException { assertEquals( "test41 failed", "http://a/b/c/g", (new LinkProcessor()).extract("http:g", baseURI)); } } |
... this post is sponsored by my books ... | |
#1 New Release! |
FP Best Seller |
Copyright 1998-2021 Alvin Alexander, alvinalexander.com
All Rights Reserved.
A percentage of advertising revenue from
pages under the /java/jwarehouse
URI on this website is
paid back to open source projects.