aoc-2022/venv/Lib/site-packages/bs4/tests/test_soup.py

# -*- coding: utf-8 -*-
"""Tests of Beautiful Soup as a whole."""

from pdb import set_trace
import logging
import os
import pickle
import pytest
import sys
import tempfile

from bs4 import (
    BeautifulSoup,
    BeautifulStoneSoup,
    GuessedAtParserWarning,
    MarkupResemblesLocatorWarning,
    dammit,
)
from bs4.builder import (
    builder_registry,
    TreeBuilder,
    ParserRejectedMarkup,
)
from bs4.element import (
    Comment,
    SoupStrainer,
    Tag,
    NavigableString,
)

from . import (
    default_builder,
    SoupTest,
    skipIf,
)
import warnings

try:
    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
    LXML_PRESENT = True
except ImportError as e:
    LXML_PRESENT = False
    
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))

class TestConstructor(SoupTest):

    def test_short_unicode_input(self):
        data = "<h1>éé</h1>"
        soup = self.soup(data)
        assert "éé" == soup.h1.string

    def test_embedded_null(self):
        data = "<h1>foo\0bar</h1>"
        soup = self.soup(data)
        assert "foo\0bar" == soup.h1.string

    def test_exclude_encodings(self):
        utf8_data = "Räksmörgås".encode("utf-8")
        soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
        assert "windows-1252" == soup.original_encoding

    def test_custom_builder_class(self):
        # Verify that you can pass in a custom Builder class and
        # it'll be instantiated with the appropriate keyword arguments.
        class Mock(object):
            def __init__(self, **kwargs):
                self.called_with = kwargs
                self.is_xml = True
                self.store_line_numbers = False
                self.cdata_list_attributes = []
                self.preserve_whitespace_tags = []
                self.string_containers = {}
            def initialize_soup(self, soup):
                pass
            def feed(self, markup):
                self.fed = markup
            def reset(self):
                pass
            def ignore(self, ignore):
                pass
            set_up_substitutions = can_be_empty_element = ignore
            def prepare_markup(self, *args, **kwargs):
                yield "prepared markup", "original encoding", "declared encoding", "contains replacement characters"
                
        kwargs = dict(
            var="value",
            # This is a deprecated BS3-era keyword argument, which
            # will be stripped out.
            convertEntities=True,
        )
        with warnings.catch_warnings(record=True):
            soup = BeautifulSoup('', builder=Mock, **kwargs)
        assert isinstance(soup.builder, Mock)
        assert dict(var="value") == soup.builder.called_with
        assert "prepared markup" == soup.builder.fed
        
        # You can also instantiate the TreeBuilder yourself. In this
        # case, that specific object is used and any keyword arguments
        # to the BeautifulSoup constructor are ignored.
        builder = Mock(**kwargs)
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulSoup(
                '', builder=builder, ignored_value=True,
            )
        msg = str(w[0].message)
        assert msg.startswith("Keyword arguments to the BeautifulSoup constructor will be ignored.")
        assert builder == soup.builder
        assert kwargs == builder.called_with

    def test_parser_markup_rejection(self):
        # If markup is completely rejected by the parser, an
        # explanatory ParserRejectedMarkup exception is raised.
        class Mock(TreeBuilder):
            def feed(self, *args, **kwargs):
                raise ParserRejectedMarkup("Nope.")

        def prepare_markup(self, *args, **kwargs):
            # We're going to try two different ways of preparing this markup,
            # but feed() will reject both of them.
            yield markup, None, None, False
            yield markup, None, None, False
            

        import re
        with pytest.raises(ParserRejectedMarkup) as exc_info:
            BeautifulSoup('', builder=Mock)
        assert "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help." in str(exc_info.value)
        
    def test_cdata_list_attributes(self):
        # Most attribute values are represented as scalars, but the
        # HTML standard says that some attributes, like 'class' have
        # space-separated lists as values.
        markup = '<a id=" an id " class=" a class "></a>'
        soup = self.soup(markup)

        # Note that the spaces are stripped for 'class' but not for 'id'.
        a = soup.a
        assert " an id " == a['id']
        assert ["a", "class"] == a['class']

        # TreeBuilder takes an argument called 'mutli_valued_attributes'  which lets
        # you customize or disable this. As always, you can customize the TreeBuilder
        # by passing in a keyword argument to the BeautifulSoup constructor.
        soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None)
        assert " a class " == soup.a['class']

        # Here are two ways of saying that `id` is a multi-valued
        # attribute in this context, but 'class' is not.
        for switcheroo in ({'*': 'id'}, {'a': 'id'}):
            with warnings.catch_warnings(record=True) as w:
                # This will create a warning about not explicitly
                # specifying a parser, but we'll ignore it.
                soup = self.soup(markup, builder=None, multi_valued_attributes=switcheroo)
            a = soup.a
            assert ["an", "id"] == a['id']
            assert " a class " == a['class']

    def test_replacement_classes(self):
        # Test the ability to pass in replacements for element classes
        # which will be used when building the tree.
        class TagPlus(Tag):
            pass

        class StringPlus(NavigableString):
            pass

        class CommentPlus(Comment):
            pass
        
        soup = self.soup(
            "<a><b>foo</b>bar</a><!--whee-->",
            element_classes = {
                Tag: TagPlus,
                NavigableString: StringPlus,
                Comment: CommentPlus,
            }
        )

        # The tree was built with TagPlus, StringPlus, and CommentPlus objects,
        # rather than Tag, String, and Comment objects.
        assert all(
            isinstance(x, (TagPlus, StringPlus, CommentPlus))
            for x in soup.recursiveChildGenerator()
        )

    def test_alternate_string_containers(self):
        # Test the ability to customize the string containers for
        # different types of tags.
        class PString(NavigableString):
            pass

        class BString(NavigableString):
            pass

        soup = self.soup(
            "<div>Hello.<p>Here is <b>some <i>bolded</i></b> text",
            string_containers = {
                'b': BString,
                'p': PString,
            }
        )

        # The string before the <p> tag is a regular NavigableString.
        assert isinstance(soup.div.contents[0], NavigableString)
        
        # The string inside the <p> tag, but not inside the <i> tag,
        # is a PString.
        assert isinstance(soup.p.contents[0], PString)

        # Every string inside the <b> tag is a BString, even the one that
        # was also inside an <i> tag.
        for s in soup.b.strings:
            assert isinstance(s, BString)

        # Now that parsing was complete, the string_container_stack
        # (where this information was kept) has been cleared out.
        assert [] == soup.string_container_stack


class TestWarnings(SoupTest):

    def _assert_warning(self, warnings, cls):
        for w in warnings:
            if isinstance(w.message, cls):
                return w
        raise Exception("%s warning not found in %r" % (cls, warnings))
    
    def _assert_no_parser_specified(self, w):
        warning = self._assert_warning(w, GuessedAtParserWarning)
        message = str(warning.message)
        assert message.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:60])

    def test_warning_if_no_parser_specified(self):
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulSoup("<a><b></b></a>")
        self._assert_no_parser_specified(w)

    def test_warning_if_parser_specified_too_vague(self):
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulSoup("<a><b></b></a>", "html")
        self._assert_no_parser_specified(w)

    def test_no_warning_if_explicit_parser_specified(self):
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulSoup("<a><b></b></a>", "html.parser")
        assert [] == w

    def test_parseOnlyThese_renamed_to_parse_only(self):
        with warnings.catch_warnings(record=True) as w:
            soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
        msg = str(w[0].message)
        assert "parseOnlyThese" in msg
        assert "parse_only" in msg
        assert b"<b></b>" == soup.encode()

    def test_fromEncoding_renamed_to_from_encoding(self):
        with warnings.catch_warnings(record=True) as w:
            utf8 = b"\xc3\xa9"
            soup = self.soup(utf8, fromEncoding="utf8")
        msg = str(w[0].message)
        assert "fromEncoding" in msg
        assert "from_encoding" in msg
        assert "utf8" == soup.original_encoding

    def test_unrecognized_keyword_argument(self):
        with pytest.raises(TypeError):
            self.soup("<a>", no_such_argument=True)

    @pytest.mark.parametrize(
        "extension",
        ['markup.html', 'markup.htm', 'markup.HTML', 'markup.txt',
         'markup.xhtml', 'markup.xml', "/home/user/file", "c:\\user\file"]
    )
    def test_resembles_filename_warning(self, extension):
        # A warning is issued if the "markup" looks like the name of
        # an HTML or text file, or a full path to a file on disk.
        with warnings.catch_warnings(record=True) as w:
            soup = self.soup("markup" + extension)
            warning = self._assert_warning(w, MarkupResemblesLocatorWarning)
            assert "looks more like a filename" in str(warning.message)

    @pytest.mark.parametrize(
        "extension",
        ['markuphtml', 'markup.com', '', 'markup.js']
    )
    def test_resembles_filename_no_warning(self, extension):
        # The 'looks more like a filename' warning is not issued if
        # the markup looks like a bare string, a domain name, or a
        # file that's not an HTML file.
        with warnings.catch_warnings(record=True) as w:
            soup = self.soup("markup" + extension)
        assert [] == w
        
    def test_url_warning_with_bytes_url(self):
        url = b"http://www.crummybytes.com/"
        with warnings.catch_warnings(record=True) as warning_list:
            soup = self.soup(url)
        warning = self._assert_warning(
            warning_list, MarkupResemblesLocatorWarning
        )
        assert "looks more like a URL" in str(warning.message)
        assert url not in str(warning.message).encode("utf8")
        
    def test_url_warning_with_unicode_url(self):
        url = "http://www.crummyunicode.com/"
        with warnings.catch_warnings(record=True) as warning_list:
            # note - this url must differ from the bytes one otherwise
            # python's warnings system swallows the second warning
            soup = self.soup(url)
        warning = self._assert_warning(
            warning_list, MarkupResemblesLocatorWarning
        )
        assert "looks more like a URL" in str(warning.message)
        assert url not in str(warning.message)

    def test_url_warning_with_bytes_and_space(self):
        # Here the markup contains something besides a URL, so no warning
        # is issued.
        with warnings.catch_warnings(record=True) as warning_list:
            soup = self.soup(b"http://www.crummybytes.com/ is great")
        assert not any("looks more like a URL" in str(w.message) 
                       for w in warning_list)

    def test_url_warning_with_unicode_and_space(self):
        with warnings.catch_warnings(record=True) as warning_list:
            soup = self.soup("http://www.crummyunicode.com/ is great")
        assert not any("looks more like a URL" in str(w.message) 
                       for w in warning_list)


class TestSelectiveParsing(SoupTest):

    def test_parse_with_soupstrainer(self):
        markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
        strainer = SoupStrainer("b")
        soup = self.soup(markup, parse_only=strainer)
        assert soup.encode() == b"<b>Yes</b><b>Yes <c>Yes</c></b>"

        
class TestNewTag(SoupTest):
    """Test the BeautifulSoup.new_tag() method."""
    def test_new_tag(self):
        soup = self.soup("")
        new_tag = soup.new_tag("foo", bar="baz", attrs={"name": "a name"})
        assert isinstance(new_tag, Tag)
        assert "foo" == new_tag.name
        assert dict(bar="baz", name="a name") == new_tag.attrs
        assert None == new_tag.parent
        
    def test_tag_inherits_self_closing_rules_from_builder(self):
        if LXML_PRESENT:
            xml_soup = BeautifulSoup("", "lxml-xml")
            xml_br = xml_soup.new_tag("br")
            xml_p = xml_soup.new_tag("p")

            # Both the <br> and <p> tag are empty-element, just because
            # they have no contents.
            assert b"<br/>" == xml_br.encode()
            assert b"<p/>" == xml_p.encode()

        html_soup = BeautifulSoup("", "html.parser")
        html_br = html_soup.new_tag("br")
        html_p = html_soup.new_tag("p")

        # The HTML builder users HTML's rules about which tags are
        # empty-element tags, and the new tags reflect these rules.
        assert b"<br/>" == html_br.encode()
        assert b"<p></p>" == html_p.encode()

class TestNewString(SoupTest):
    """Test the BeautifulSoup.new_string() method."""
    def test_new_string_creates_navigablestring(self):
        soup = self.soup("")
        s = soup.new_string("foo")
        assert "foo" == s
        assert isinstance(s, NavigableString)

    def test_new_string_can_create_navigablestring_subclass(self):
        soup = self.soup("")
        s = soup.new_string("foo", Comment)
        assert "foo" == s
        assert isinstance(s, Comment)


class TestPickle(SoupTest):
   # Test our ability to pickle the BeautifulSoup object itself.

    def test_normal_pickle(self):
        soup = self.soup("<a>some markup</a>")
        pickled = pickle.dumps(soup)
        unpickled = pickle.loads(pickled)
        assert "some markup" == unpickled.a.string
        
    def test_pickle_with_no_builder(self):
        # We had a bug that prevented pickling from working if
        # the builder wasn't set.
        soup = self.soup("some markup")
        soup.builder = None
        pickled = pickle.dumps(soup)
        unpickled = pickle.loads(pickled)
        assert "some markup" == unpickled.string

class TestEncodingConversion(SoupTest):
    # Test Beautiful Soup's ability to decode and encode from various
    # encodings.

    def setup_method(self):
        self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
        self.utf8_data = self.unicode_data.encode("utf-8")
        # Just so you know what it looks like.
        assert self.utf8_data == b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>'

    def test_ascii_in_unicode_out(self):
        # ASCII input is converted to Unicode. The original_encoding
        # attribute is set to 'utf-8', a superset of ASCII.
        chardet = dammit.chardet_dammit
        logging.disable(logging.WARNING)
        try:
            def noop(str):
                return None
            # Disable chardet, which will realize that the ASCII is ASCII.
            dammit.chardet_dammit = noop
            ascii = b"<foo>a</foo>"
            soup_from_ascii = self.soup(ascii)
            unicode_output = soup_from_ascii.decode()
            assert isinstance(unicode_output, str)
            assert unicode_output == self.document_for(ascii.decode())
            assert soup_from_ascii.original_encoding.lower() == "utf-8"
        finally:
            logging.disable(logging.NOTSET)
            dammit.chardet_dammit = chardet

    def test_unicode_in_unicode_out(self):
        # Unicode input is left alone. The original_encoding attribute
        # is not set.
        soup_from_unicode = self.soup(self.unicode_data)
        assert soup_from_unicode.decode() == self.unicode_data
        assert soup_from_unicode.foo.string == 'Sacr\xe9 bleu!'
        assert soup_from_unicode.original_encoding == None

    def test_utf8_in_unicode_out(self):
        # UTF-8 input is converted to Unicode. The original_encoding
        # attribute is set.
        soup_from_utf8 = self.soup(self.utf8_data)
        assert soup_from_utf8.decode() == self.unicode_data
        assert soup_from_utf8.foo.string == 'Sacr\xe9 bleu!'

    def test_utf8_out(self):
        # The internal data structures can be encoded as UTF-8.
        soup_from_unicode = self.soup(self.unicode_data)
        assert soup_from_unicode.encode('utf-8') == self.utf8_data

    @skipIf(
        PYTHON_3_PRE_3_2,
        "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
    def test_attribute_name_containing_unicode_characters(self):
        markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
        assert self.soup(markup).div.encode("utf8") == markup.encode("utf8")
Give it up for day 1 of smooth brain shenanigans! https://pbs.twimg.com/media/E1bxikWWEAEGYHU.png 2022-12-01 16:50:29 +00:00			`# -- coding: utf-8 --`
			`"""Tests of Beautiful Soup as a whole."""`

			`from pdb import set_trace`
			`import logging`
			`import os`
			`import pickle`
			`import pytest`
			`import sys`
			`import tempfile`

			`from bs4 import (`
			`BeautifulSoup,`
			`BeautifulStoneSoup,`
			`GuessedAtParserWarning,`
			`MarkupResemblesLocatorWarning,`
			`dammit,`
			`)`
			`from bs4.builder import (`
			`builder_registry,`
			`TreeBuilder,`
			`ParserRejectedMarkup,`
			`)`
			`from bs4.element import (`
			`Comment,`
			`SoupStrainer,`
			`Tag,`
			`NavigableString,`
			`)`

			`from . import (`
			`default_builder,`
			`SoupTest,`
			`skipIf,`
			`)`
			`import warnings`

			`try:`
			`from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML`
			`LXML_PRESENT = True`
			`except ImportError as e:`
			`LXML_PRESENT = False`

			`PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))`

			`class TestConstructor(SoupTest):`

			`def test_short_unicode_input(self):`
			`data = "<h1>éé</h1>"`
			`soup = self.soup(data)`
			`assert "éé" == soup.h1.string`

			`def test_embedded_null(self):`
			`data = "<h1>foo\0bar</h1>"`
			`soup = self.soup(data)`
			`assert "foo\0bar" == soup.h1.string`

			`def test_exclude_encodings(self):`
			`utf8_data = "Räksmörgås".encode("utf-8")`
			`soup = self.soup(utf8_data, exclude_encodings=["utf-8"])`
			`assert "windows-1252" == soup.original_encoding`

			`def test_custom_builder_class(self):`
			`# Verify that you can pass in a custom Builder class and`
			`# it'll be instantiated with the appropriate keyword arguments.`
			`class Mock(object):`
			`def __init__(self, **kwargs):`
			`self.called_with = kwargs`
			`self.is_xml = True`
			`self.store_line_numbers = False`
			`self.cdata_list_attributes = []`
			`self.preserve_whitespace_tags = []`
			`self.string_containers = {}`
			`def initialize_soup(self, soup):`
			`pass`
			`def feed(self, markup):`
			`self.fed = markup`
			`def reset(self):`
			`pass`
			`def ignore(self, ignore):`
			`pass`
			`set_up_substitutions = can_be_empty_element = ignore`
			`def prepare_markup(self, args, *kwargs):`
			`yield "prepared markup", "original encoding", "declared encoding", "contains replacement characters"`

			`kwargs = dict(`
			`var="value",`
			`# This is a deprecated BS3-era keyword argument, which`
			`# will be stripped out.`
			`convertEntities=True,`
			`)`
			`with warnings.catch_warnings(record=True):`
			`soup = BeautifulSoup('', builder=Mock, **kwargs)`
			`assert isinstance(soup.builder, Mock)`
			`assert dict(var="value") == soup.builder.called_with`
			`assert "prepared markup" == soup.builder.fed`

			`# You can also instantiate the TreeBuilder yourself. In this`
			`# case, that specific object is used and any keyword arguments`
			`# to the BeautifulSoup constructor are ignored.`
			`builder = Mock(**kwargs)`
			`with warnings.catch_warnings(record=True) as w:`
			`soup = BeautifulSoup(`
			`'', builder=builder, ignored_value=True,`
			`)`
			`msg = str(w[0].message)`
			`assert msg.startswith("Keyword arguments to the BeautifulSoup constructor will be ignored.")`
			`assert builder == soup.builder`
			`assert kwargs == builder.called_with`

			`def test_parser_markup_rejection(self):`
			`# If markup is completely rejected by the parser, an`
			`# explanatory ParserRejectedMarkup exception is raised.`
			`class Mock(TreeBuilder):`
			`def feed(self, args, *kwargs):`
			`raise ParserRejectedMarkup("Nope.")`

			`def prepare_markup(self, args, *kwargs):`
			`# We're going to try two different ways of preparing this markup,`
			`# but feed() will reject both of them.`
			`yield markup, None, None, False`
			`yield markup, None, None, False`


			`import re`
			`with pytest.raises(ParserRejectedMarkup) as exc_info:`
			`BeautifulSoup('', builder=Mock)`
			`assert "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help." in str(exc_info.value)`

			`def test_cdata_list_attributes(self):`
			`# Most attribute values are represented as scalars, but the`
			`# HTML standard says that some attributes, like 'class' have`
			`# space-separated lists as values.`
			`markup = '<a id=" an id " class=" a class "></a>'`
			`soup = self.soup(markup)`

			`# Note that the spaces are stripped for 'class' but not for 'id'.`
			`a = soup.a`
			`assert " an id " == a['id']`
			`assert ["a", "class"] == a['class']`

			`# TreeBuilder takes an argument called 'mutli_valued_attributes' which lets`
			`# you customize or disable this. As always, you can customize the TreeBuilder`
			`# by passing in a keyword argument to the BeautifulSoup constructor.`
			`soup = self.soup(markup, builder=default_builder, multi_valued_attributes=None)`
			`assert " a class " == soup.a['class']`

			# Here are two ways of saying that `id` is a multi-valued
			`# attribute in this context, but 'class' is not.`
			`for switcheroo in ({'*': 'id'}, {'a': 'id'}):`
			`with warnings.catch_warnings(record=True) as w:`
			`# This will create a warning about not explicitly`
			`# specifying a parser, but we'll ignore it.`
			`soup = self.soup(markup, builder=None, multi_valued_attributes=switcheroo)`
			`a = soup.a`
			`assert ["an", "id"] == a['id']`
			`assert " a class " == a['class']`

			`def test_replacement_classes(self):`
			`# Test the ability to pass in replacements for element classes`
			`# which will be used when building the tree.`
			`class TagPlus(Tag):`
			`pass`

			`class StringPlus(NavigableString):`
			`pass`

			`class CommentPlus(Comment):`
			`pass`

			`soup = self.soup(`
			`"<a><b>foo</b>bar</a><!--whee-->",`
			`element_classes = {`
			`Tag: TagPlus,`
			`NavigableString: StringPlus,`
			`Comment: CommentPlus,`
			`}`
			`)`

			`# The tree was built with TagPlus, StringPlus, and CommentPlus objects,`
			`# rather than Tag, String, and Comment objects.`
			`assert all(`
			`isinstance(x, (TagPlus, StringPlus, CommentPlus))`
			`for x in soup.recursiveChildGenerator()`
			`)`

			`def test_alternate_string_containers(self):`
			`# Test the ability to customize the string containers for`
			`# different types of tags.`
			`class PString(NavigableString):`
			`pass`

			`class BString(NavigableString):`
			`pass`

			`soup = self.soup(`
			`"<div>Hello.<p>Here is <b>some <i>bolded</i></b> text",`
			`string_containers = {`
			`'b': BString,`
			`'p': PString,`
			`}`
			`)`

			`# The string before the <p> tag is a regular NavigableString.`
			`assert isinstance(soup.div.contents[0], NavigableString)`

			`# The string inside the <p> tag, but not inside the <i> tag,`
			`# is a PString.`
			`assert isinstance(soup.p.contents[0], PString)`

			`# Every string inside the <b> tag is a BString, even the one that`
			`# was also inside an <i> tag.`
			`for s in soup.b.strings:`
			`assert isinstance(s, BString)`

			`# Now that parsing was complete, the string_container_stack`
			`# (where this information was kept) has been cleared out.`
			`assert [] == soup.string_container_stack`


			`class TestWarnings(SoupTest):`

			`def _assert_warning(self, warnings, cls):`
			`for w in warnings:`
			`if isinstance(w.message, cls):`
			`return w`
			`raise Exception("%s warning not found in %r" % (cls, warnings))`

			`def _assert_no_parser_specified(self, w):`
			`warning = self._assert_warning(w, GuessedAtParserWarning)`
			`message = str(warning.message)`
			`assert message.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:60])`

			`def test_warning_if_no_parser_specified(self):`
			`with warnings.catch_warnings(record=True) as w:`
			`soup = BeautifulSoup("<a><b></b></a>")`
			`self._assert_no_parser_specified(w)`

			`def test_warning_if_parser_specified_too_vague(self):`
			`with warnings.catch_warnings(record=True) as w:`
			`soup = BeautifulSoup("<a><b></b></a>", "html")`
			`self._assert_no_parser_specified(w)`

			`def test_no_warning_if_explicit_parser_specified(self):`
			`with warnings.catch_warnings(record=True) as w:`
			`soup = BeautifulSoup("<a><b></b></a>", "html.parser")`
			`assert [] == w`

			`def test_parseOnlyThese_renamed_to_parse_only(self):`
			`with warnings.catch_warnings(record=True) as w:`
			`soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))`
			`msg = str(w[0].message)`
			`assert "parseOnlyThese" in msg`
			`assert "parse_only" in msg`
			`assert b"<b></b>" == soup.encode()`

			`def test_fromEncoding_renamed_to_from_encoding(self):`
			`with warnings.catch_warnings(record=True) as w:`
			`utf8 = b"\xc3\xa9"`
			`soup = self.soup(utf8, fromEncoding="utf8")`
			`msg = str(w[0].message)`
			`assert "fromEncoding" in msg`
			`assert "from_encoding" in msg`
			`assert "utf8" == soup.original_encoding`

			`def test_unrecognized_keyword_argument(self):`
			`with pytest.raises(TypeError):`
			`self.soup("<a>", no_such_argument=True)`

			`@pytest.mark.parametrize(`
			`"extension",`
			`['markup.html', 'markup.htm', 'markup.HTML', 'markup.txt',`
			`'markup.xhtml', 'markup.xml', "/home/user/file", "c:\\user\file"]`
			`)`
			`def test_resembles_filename_warning(self, extension):`
			`# A warning is issued if the "markup" looks like the name of`
			`# an HTML or text file, or a full path to a file on disk.`
			`with warnings.catch_warnings(record=True) as w:`
			`soup = self.soup("markup" + extension)`
			`warning = self._assert_warning(w, MarkupResemblesLocatorWarning)`
			`assert "looks more like a filename" in str(warning.message)`

			`@pytest.mark.parametrize(`
			`"extension",`
			`['markuphtml', 'markup.com', '', 'markup.js']`
			`)`
			`def test_resembles_filename_no_warning(self, extension):`
			`# The 'looks more like a filename' warning is not issued if`
			`# the markup looks like a bare string, a domain name, or a`
			`# file that's not an HTML file.`
			`with warnings.catch_warnings(record=True) as w:`
			`soup = self.soup("markup" + extension)`
			`assert [] == w`

			`def test_url_warning_with_bytes_url(self):`
			`url = b"http://www.crummybytes.com/"`
			`with warnings.catch_warnings(record=True) as warning_list:`
			`soup = self.soup(url)`
			`warning = self._assert_warning(`
			`warning_list, MarkupResemblesLocatorWarning`
			`)`
			`assert "looks more like a URL" in str(warning.message)`
			`assert url not in str(warning.message).encode("utf8")`

			`def test_url_warning_with_unicode_url(self):`
			`url = "http://www.crummyunicode.com/"`
			`with warnings.catch_warnings(record=True) as warning_list:`
			`# note - this url must differ from the bytes one otherwise`
			`# python's warnings system swallows the second warning`
			`soup = self.soup(url)`
			`warning = self._assert_warning(`
			`warning_list, MarkupResemblesLocatorWarning`
			`)`
			`assert "looks more like a URL" in str(warning.message)`
			`assert url not in str(warning.message)`

			`def test_url_warning_with_bytes_and_space(self):`
			`# Here the markup contains something besides a URL, so no warning`
			`# is issued.`
			`with warnings.catch_warnings(record=True) as warning_list:`
			`soup = self.soup(b"http://www.crummybytes.com/ is great")`
			`assert not any("looks more like a URL" in str(w.message)`
			`for w in warning_list)`

			`def test_url_warning_with_unicode_and_space(self):`
			`with warnings.catch_warnings(record=True) as warning_list:`
			`soup = self.soup("http://www.crummyunicode.com/ is great")`
			`assert not any("looks more like a URL" in str(w.message)`
			`for w in warning_list)`


			`class TestSelectiveParsing(SoupTest):`

			`def test_parse_with_soupstrainer(self):`
			`markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"`
			`strainer = SoupStrainer("b")`
			`soup = self.soup(markup, parse_only=strainer)`
			`assert soup.encode() == b"<b>Yes</b><b>Yes <c>Yes</c></b>"`


			`class TestNewTag(SoupTest):`
			`"""Test the BeautifulSoup.new_tag() method."""`
			`def test_new_tag(self):`
			`soup = self.soup("")`
			`new_tag = soup.new_tag("foo", bar="baz", attrs={"name": "a name"})`
			`assert isinstance(new_tag, Tag)`
			`assert "foo" == new_tag.name`
			`assert dict(bar="baz", name="a name") == new_tag.attrs`
			`assert None == new_tag.parent`

			`def test_tag_inherits_self_closing_rules_from_builder(self):`
			`if LXML_PRESENT:`
			`xml_soup = BeautifulSoup("", "lxml-xml")`
			`xml_br = xml_soup.new_tag("br")`
			`xml_p = xml_soup.new_tag("p")`

			`# Both the <br> and <p> tag are empty-element, just because`
			`# they have no contents.`
			`assert b"<br/>" == xml_br.encode()`
			`assert b"<p/>" == xml_p.encode()`

			`html_soup = BeautifulSoup("", "html.parser")`
			`html_br = html_soup.new_tag("br")`
			`html_p = html_soup.new_tag("p")`

			`# The HTML builder users HTML's rules about which tags are`
			`# empty-element tags, and the new tags reflect these rules.`
			`assert b"<br/>" == html_br.encode()`
			`assert b"<p></p>" == html_p.encode()`

			`class TestNewString(SoupTest):`
			`"""Test the BeautifulSoup.new_string() method."""`
			`def test_new_string_creates_navigablestring(self):`
			`soup = self.soup("")`
			`s = soup.new_string("foo")`
			`assert "foo" == s`
			`assert isinstance(s, NavigableString)`

			`def test_new_string_can_create_navigablestring_subclass(self):`
			`soup = self.soup("")`
			`s = soup.new_string("foo", Comment)`
			`assert "foo" == s`
			`assert isinstance(s, Comment)`


			`class TestPickle(SoupTest):`
			`# Test our ability to pickle the BeautifulSoup object itself.`

			`def test_normal_pickle(self):`
			`soup = self.soup("<a>some markup</a>")`
			`pickled = pickle.dumps(soup)`
			`unpickled = pickle.loads(pickled)`
			`assert "some markup" == unpickled.a.string`

			`def test_pickle_with_no_builder(self):`
			`# We had a bug that prevented pickling from working if`
			`# the builder wasn't set.`
			`soup = self.soup("some markup")`
			`soup.builder = None`
			`pickled = pickle.dumps(soup)`
			`unpickled = pickle.loads(pickled)`
			`assert "some markup" == unpickled.string`

			`class TestEncodingConversion(SoupTest):`
			`# Test Beautiful Soup's ability to decode and encode from various`
			`# encodings.`

			`def setup_method(self):`
			`self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'`
			`self.utf8_data = self.unicode_data.encode("utf-8")`
			`# Just so you know what it looks like.`
			`assert self.utf8_data == b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>'`

			`def test_ascii_in_unicode_out(self):`
			`# ASCII input is converted to Unicode. The original_encoding`
			`# attribute is set to 'utf-8', a superset of ASCII.`
			`chardet = dammit.chardet_dammit`
			`logging.disable(logging.WARNING)`
			`try:`
			`def noop(str):`
			`return None`
			`# Disable chardet, which will realize that the ASCII is ASCII.`
			`dammit.chardet_dammit = noop`
			`ascii = b"<foo>a</foo>"`
			`soup_from_ascii = self.soup(ascii)`
			`unicode_output = soup_from_ascii.decode()`
			`assert isinstance(unicode_output, str)`
			`assert unicode_output == self.document_for(ascii.decode())`
			`assert soup_from_ascii.original_encoding.lower() == "utf-8"`
			`finally:`
			`logging.disable(logging.NOTSET)`
			`dammit.chardet_dammit = chardet`

			`def test_unicode_in_unicode_out(self):`
			`# Unicode input is left alone. The original_encoding attribute`
			`# is not set.`
			`soup_from_unicode = self.soup(self.unicode_data)`
			`assert soup_from_unicode.decode() == self.unicode_data`
			`assert soup_from_unicode.foo.string == 'Sacr\xe9 bleu!'`
			`assert soup_from_unicode.original_encoding == None`

			`def test_utf8_in_unicode_out(self):`
			`# UTF-8 input is converted to Unicode. The original_encoding`
			`# attribute is set.`
			`soup_from_utf8 = self.soup(self.utf8_data)`
			`assert soup_from_utf8.decode() == self.unicode_data`
			`assert soup_from_utf8.foo.string == 'Sacr\xe9 bleu!'`

			`def test_utf8_out(self):`
			`# The internal data structures can be encoded as UTF-8.`
			`soup_from_unicode = self.soup(self.unicode_data)`
			`assert soup_from_unicode.encode('utf-8') == self.utf8_data`

			`@skipIf(`
			`PYTHON_3_PRE_3_2,`
			`"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")`
			`def test_attribute_name_containing_unicode_characters(self):`
			`markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'`
			`assert self.soup(markup).div.encode("utf8") == markup.encode("utf8")`