/ lib / lxml / html / __init__.pyc
__init__.pyc
  1  o

  2  a��cC�@s�dZddlmZgd�ZddlZddlZddlZddlmZz
  3  ddl	m
  4  Z
  5  mZWney9ddl
m
  6  Z
  7  mZYnwddlmZd	d
  8  lmZd	dlmZzddlmZWneycddlmZYnwzeWneyseZYnwzeWn
ey�eefZYnwd
d�ZdZejddeid�Zejddeid�Zejddeid�Z e�d�Z!e�d�Z"e�d�Z#e�$dej%�j&Z'e�$d�j&Z(ejddeid�Z)e�$d�Z*e�$dej%�j+Z,dd�Z-dd �Z.d!d"�Z/Gd#d$�d$e�Z0Gd%d&�d&e1�Z2Gd'd(�d(e1�Z3e3d)d*d+�Z4e3d,d*d+�Z5e3d-d.d+�Z6e3d/d.d+�Z7e3d0d*d+�Z8e3d1d.d+�Z9Gd2d3�d3e2ej:�Z;Gd4d5�d5e2ej<�Z=Gd6d7�d7e2ej>�Z?Gd8d9�d9e2ej@�ZAGd:d;�d;ejB�ZCe�$ed<�ej%�jDZEe�$d<�Fd=�ej%�jDZGd~d>d?�ZH		dd@dA�ZI		ddBdC�ZJd�dDdE�ZKd�dFdG�ZLdHdI�ZMdJdK�ZNGdLdM�dMe=�ZOeOeCjPdN<d�dOdP�ZQdQdR�ZRGdSdT�dTe
  9  �ZSGdUdV�dVe1�ZTGdWdX�dXe1�ZUGdYdZ�dZeUe=�ZVeVeCjPd[<Gd\d]�d]eUe=�ZWeWeCjPd^<Gd_d`�d`e�ZXGdadb�dbeY�ZZGdcdd�ddeY�Z[Gdedf�dfe�Z\Gdgdh�dheUe=�Z]e]eCjPdi<Gdjdk�dke=�Z^e^eCjPdl<dmdn�Z_dodp�Z`e�$dq�jaZbe�$dq�Fdr��jaZc	*	d�dtdu�Zdeedj�ed_d�dvdw�ZeGdxdy�dyejf�ZfGdzd{�d{ejg�Zhd|d}�Zief�Zjeh�ZkdS)�z.The ``lxml.html`` tool set for HTML handling.
 10  �)�absolute_import)�document_fromstring�fragment_fromstring�fragments_fromstring�
 11  fromstring�tostring�Element�defs�open_in_browser�submit_form�find_rel_links�
 12  find_class�make_links_absolute�resolve_base_href�	iterlinks�
rewrite_links�parseN)�partial)�MutableMapping�
 13  MutableSet�)�etree�)r	)�SetMixin��urljoincCsB|s|Stjddkrt�dtj�j}nt�dtj�j}|d|�S)Nr�z^(\s*)u'z^(\s*)b'z\1')�sys�version_info�re�compile�M�sub)�sr"�r$��C:\Users\Jacks.GUTTSPC\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\LocalCache\local-packages\Python310\site-packages\lxml\html\__init__.py�__fix_docstringKs
 14  r&zhttp://www.w3.org/1999/xhtmlz9descendant-or-self::a[@rel]|descendant-or-self::x:a[@rel]�x��
 15  namespacesz7descendant-or-self::option|descendant-or-self::x:optionz3descendant-or-self::form|descendant-or-self::x:formztdescendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), concat(' ', $class_name, ' '))]zdescendant-or-self::*[@id=$id]zstring()z&url\((["][^"]*["]|['][^']*[']|[^)]*)\)z@import "(.*?)"z%//label[@for=$id]|//x:label[@for=$id]z[^ ]+z%[^;=]*;\s*(?:url\s*=\s*)?(?P<url>.*)$cCs\|dd�dkr|dd�dks |dd�dkr*|dd�dkr*|dd�|dfS||fS)Nr�"������'r$)r#�posr$r$r%�_unquote_matchjs@r.cCs0t|t�rt|dd�St|t�rt|dd�S|S)z1Convert the result back into the input type.
 16      zutf-8)�encoding�unicode)�
 17  issubclass�bytesrr0)�typ�resultr$r$r%�_transform_resultqs
 18  
 19  
 20  r5cCs@t|t�r|ddkr|dtt�d�tkr|�d�dS|S)Nr�{r�}r+)�
 21  isinstance�
 22  basestring�len�XHTML_NAMESPACE�split)�tagr$r$r%�_nons|s
 23  $r>cs`eZdZdZdd�Zdd�Zdd�Z�fdd	�Zd
 24  d�Zdd
�Z	dd�Z
 25  dd�Zdd�Z�Z
S)�Classesa*Provides access to an element's class attribute as a set-like collection.
 26      Usage::
 27  
 28          >>> el = fromstring('<p class="hidden large">Text</p>')
 29          >>> classes = el.classes  # or: classes = Classes(el.attrib)
 30          >>> classes |= ['block', 'paragraph']
 31          >>> el.get('class')
 32          'hidden large block paragraph'
 33          >>> classes.toggle('hidden')
 34          False
 35          >>> el.get('class')
 36          'large block paragraph'
 37          >>> classes -= ('some', 'classes', 'block')
 38          >>> el.get('class')
 39          'large paragraph'
 40      cCs||_t|jdd�|_dS)N�class�)�_attributesr�get�_get_class_value)�self�
 41  attributesr$r$r%�__init__�szClasses.__init__cCsR|rt�d|�rtd|��|����}||vrdS|�|�d�|�|jd<dS)z[
 42          Add a class.
 43  
 44          This has no effect if the class is already present.
 45          �\s�Invalid class name: %rN� r@)r�search�
 46  ValueErrorrDr<�append�joinrB�rE�value�classesr$r$r%�add�s
 47  zClasses.addcsh�rt�d��rtd����fdd�|����D�}|r'd�|�|jd<dSd|jvr2|jd=dSdS)zn
 48          Remove a class if it is currently present.
 49  
 50          If the class is not present, do nothing.
 51          rHrIcsg|]}|�kr|�qSr$r$)�.0�name�rPr$r%�
 52  <listcomp>�s�z#Classes.discard.<locals>.<listcomp>rJr@N)rrKrLrDr<rNrBrOr$rUr%�discard�s
 53  �zClasses.discardcs0|rt�d|�rtd|��tt|��|�dS)zw
 54          Remove a class; it must currently be present.
 55  
 56          If the class is not present, raise a KeyError.
 57          rHrIN)rrKrL�superr?�remove�rErP��	__class__r$r%rY�szClasses.removecCs|��}||vo
||��vS�N)rDr<)rErTrQr$r$r%�__contains__�szClasses.__contains__cC�t|�����Sr])�iterrDr<�rEr$r$r%�__iter__��zClasses.__iter__cCr_r])r:rDr<rar$r$r%�__len__�rczClasses.__len__cCsL|����}d}|D]
}||vr|�|�d}q
 58  |r$d�|�|jd<dSdS)z.
 59          Add all names from 'values'.
 60          FTrJr@N)rDr<rMrNrB)rE�valuesrQ�extendedrPr$r$r%�update�s
 61  ��zClasses.updatecCs�|rt�d|�rtd|��|����}z	|�|�d}Wnty-|�|�d}Ynw|r:d�|�|jd<|S|jd=|S)z�
 62          Add a class name if it isn't there yet, or remove it if it exists.
 63  
 64          Returns true if the class was added (and is now enabled) and
 65          false if it was removed (and is now disabled).
 66          rHrIFTrJr@)	rrKrLrDr<rYrMrNrB)rErPrQ�enabledr$r$r%�toggle�s
 67  
 68  ��zClasses.toggle)�__name__�
 69  __module__�__qualname__�__doc__rGrRrWrYr^rbrdrgri�
__classcell__r$r$r[r%r?�s
 70  
r?cs�eZdZd+�fdd�	Zedd��Zejdd��Zedd��Zed	d
 71  ��Zedd��Z	ed
d��Z
 72  edd��Zejdd��Zejdd��Zdd�Z
dd�Zdd�Zdd�Zdd�Zdd�Zd,d d!�Z	"	d-d#d$�Zd+d%d&�Zd'd(�Z	"	d.d)d*�Z�ZS)/�	HtmlMixinNcstt|��||�dS)z�set(self, key, value=None)
 73  
 74          Sets an element attribute.  If no value is provided, or if the value is None,
 75          creates a 'boolean' attribute without value, e.g. "<form novalidate></form>"
 76          for ``form.set('novalidate')``.
 77          N)rXro�set)rE�keyrPr[r$r%rp�sz
HtmlMixin.setcC�
 78  t|j�S)zB
 79          A set-like wrapper around the 'class' attribute.
 80          )r?�attribrar$r$r%rQ��
 81  zHtmlMixin.classescCsHt|t�sJ�|��}|r|�d|�dS|�d�dur"|jd=dSdS)Nr@)r8r?rDrprCrs)rErQrPr$r$r%rQs�cCs|��jjS)z�
 82          Returns the base URL, given when the page was parsed.
 83  
 84          Use with ``urlparse.urljoin(el.base_url, href)`` to get
 85          absolute URLs.
 86          )Zgetroottree�docinfo�URLrar$r$r%�base_url
 87  szHtmlMixin.base_urlcC�t|�S)z0
 88          Return a list of all the forms
 89          )�_forms_xpathrar$r$r%�formsszHtmlMixin.formscC�|jddtid�dS)zt
 90          Return the <body> element.  Can be called from a child element
 91          to get the document's head.
 92          z//body|//x:bodyr'r(r��xpathr;rar$r$r%�body�zHtmlMixin.bodycCr{)zu
 93          Returns the <head> element.  Can be called from a child
 94          element to get the document's head.
 95          z//head|//x:headr'r(rr|rar$r$r%�head#rzHtmlMixin.headcCs.|�d�}|s	dSt||d�}|sdS|dS)zN
 96          Get or set any <label> element associated with this element.
 97          �idN�r�r)rC�_label_xpath)rEr�r4r$r$r%�label+s
 98  zHtmlMixin.labelcCsD|�d�}|s
td|��t|j�dkrtd|��|�d|�dS)Nr�z9You cannot set a label for an element (%r) that has no idr�z5You can only assign label to a label element (not %r)�for)rC�	TypeErrorr>r=rp)rEr�r�r$r$r%r�9s
 99  ����cCs|j}|dur
|jd=dSdS)Nr�)r�rs)rEr�r$r$r%r�Fs�cCs^|��}|dus
100  J�|jr(|��}|dur|jpd|j|_n	|jp#d|j|_|�|�dS)z�
101          Removes this element from the tree, including its children and
102          text.  The tail text is joined to the previous element or
103          parent.
104          NrA)�	getparent�tail�getprevious�textrY)rE�parent�previousr$r$r%�	drop_treeLszHtmlMixin.drop_treecCs�|��}|dus
105  J�|��}|jr.t|jt�r.|dur%|jpd|j|_n	|jp)d|j|_|jrZt|�rC|d}|jp=d|j|_n|durQ|jpKd|j|_n	|jpUd|j|_|�|�}|dd�|||d�<dS)a]
106          Remove the tag, but not its children or text.  The children and text
107          are merged into the parent.
108  
109          Example::
110  
111              >>> h = fragment_fromstring('<div>Hello <b>World!</b></div>')
112              >>> h.find('.//b').drop_tag()
113              >>> print(tostring(h, encoding='unicode'))
114              <div>Hello World!</div>
115          NrAr+r)	r�r�r�r8r=r9r�r:�index)rEr�r��lastr�r$r$r%�drop_tag\s 
116  zHtmlMixin.drop_tagcs�����fdd�t|�D�S)z]
117          Find any links like ``<a rel="{rel}">...</a>``; returns a list of elements.
118          cs"g|]
}|�d����kr|�qS��rel�rC�lower�rS�elr�r$r%rV�s�z,HtmlMixin.find_rel_links.<locals>.<listcomp>)r��_rel_links_xpath)rEr�r$r�r%r|szHtmlMixin.find_rel_linkscCst||d�S)z>
119          Find any elements with the given class name.
120          )�
121  class_name)�_class_xpath)rEr�r$r$r%r
�szHtmlMixin.find_classcGs:z	t||d�dWSty|r|dYSt|��w)a�
122          Get the first element in a document with the given id.  If none is
123          found, return the default argument if provided or raise KeyError
124          otherwise.
125  
126          Note that there can be more than one element with the same id,
127          and this isn't uncommon in HTML documents found in the wild.
128          Browsers return only the first match, and this function does
129          the same.
130          r�r)�	_id_xpath�
131  IndexError�KeyError)rEr��defaultr$r$r%�get_element_by_id�s�zHtmlMixin.get_element_by_idcCrx)zT
132          Return the text content of the tag (and the text in any children).
133          )�_collect_string_contentrar$r$r%�text_content�szHtmlMixin.text_content�htmlcCsddlm}|||d�|�S)a
134          Run the CSS expression on this element and its children,
135          returning a list of the results.
136  
137          Equivalent to lxml.cssselect.CSSSelect(expr, translator='html')(self)
138          -- note that pre-compiling the expression can provide a substantial
139          speedup.
140          r)�CSSSelector)�
141  translator)�lxml.cssselectr�)rE�exprr�r�r$r$r%�	cssselect�s
142  zHtmlMixin.cssselectTcs��dur|j��durtd��|r|��|dkr �fdd�}n|dkr+�fdd�}n|dur6�fdd�}ntd	|��|�|�dS)
143  a�
144          Make all links in the document absolute, given the
145          ``base_url`` for the document (the full URL where the document
146          came from), or if no ``base_url`` is given, then the ``.base_url``
147          of the document.
148  
149          If ``resolve_base_href`` is true, then any ``<base href>``
150          tags in the document are used *and* removed from the document.
151          If it is false then any such tag is ignored.
152  
153          If ``handle_failures`` is None (default), a failure to process
154          a URL will abort the processing.  If set to 'ignore', errors
155          are ignored.  If set to 'discard', failing URLs will be removed.
156          Nz3No base_url given, and the document has no base_url�ignorecs$zt�|�WSty|YSwr]�rrL��href�rwr$r%�	link_repl�s
157  �z0HtmlMixin.make_links_absolute.<locals>.link_replrWcs"zt�|�WStyYdSwr]r�r�r�r$r%r��s
158  �cs
159  t�|�Sr]rr�r�r$r%r���
160  z(unexpected value for handle_failures: %r)rwr�rrLr)rErwr�handle_failuresr�r$r�r%r�s$��zHtmlMixin.make_links_absolutecCsNd}|jddtid�}|D]}|�d�}|��q
|sdS|j|d|d�dS)a�
161          Find any ``<base href>`` tag in the document, and apply its
162          values to all links found in the document.  Also remove the
163          tag once it has been applied.
164  
165          If ``handle_failures`` is None (default), a failure to process
166          a URL will abort the processing.  If set to 'ignore', errors
167          are ignored.  If set to 'discard', failing URLs will be removed.
168          Nz//base[@href]|//x:base[@href]r'r(r�F)rr�)r}r;rCr�r)rEr��	base_hrefZbasetags�br$r$r%r�s
169  �
170  
171  
172  �zHtmlMixin.resolve_base_hrefccs��tj}|�tj�D�]7}|j}t|j�}|dkrpd}d|vr+|�d�}|d|dfVdD]}||vrH|�|�}|durAt	||�}|||dfVq-d|vrot
173  �|�d��D]}|�d�}|duret	||�}|d||�
�fVqUn|D]}||vr�||||dfVqr|dkr�|�dd	���}	|	d
174  kr�|�dd	�}
175  t|
176  �}|r�|�d�n|
177  ��}|r�t||r�|�
d�n|
178  �|��\}}|d||fVnQ|d
kr�|�d�p�d	}
|
��dkr�|d|�d�dfVn5|dk�r|j�rdd�t|j�D�dd�t|j�D�}|�r|jdd�|D]\}}|d||fV�qd|v�rBtt|d��}|�rB|ddd�D]}t|�d�|�
d��\}}|d||fV�q*q
179  dS)a
180          Yield (element, attribute, link, pos), where attribute may be None
181          (indicating the link is in the text).  ``pos`` is the position
182          where the link occurs; often 0, but sometimes something else in
183          the case of links in stylesheets or style tags.
184  
185          Note: <base href> is *not* taken into account in any way.  The
186          link you get is exactly the link in the document.
187  
188          Note: multiple links inside of a single text string or
189          attribute value are returned in reversed order.  This makes it
190          possible to replace or delete them from the text string value
191          based on their reported text positions.  Otherwise, a
192          modification at one text position can change the positions of
193          links reported later on.
194          �objectN�codebaser)�classid�data�archive�metaz
195  http-equivrA�refresh�content�url�param�	valuetype�refrP�stylecSs,g|]}t|�d�|�d��ddd��qS)rNr+)r.�group�start�rS�matchr$r$r%rV;s��z'HtmlMixin.iterlinks.<locals>.<listcomp>cSs g|]}|�d�|�d�f�qS)r)r�r�r�r$r$r%rV?s��T)�reverser+r)r	�
196  link_attrsr`rrrsr>r=rCr�_archive_re�finditerr�r�r��_parse_meta_refresh_url�stripr.�findr��_iter_css_urls�_iter_css_imports�sort�list)rEr�r��attribsr=r�rsrPr��
197  http_equivr�r�r-r��urlsr�r$r$r%r�s��
198  
199  
200  
201  �
202  
203  ��������
204  ��zHtmlMixin.iterlinkscCs|dur|j||d�n|r|��|��D]h\}}}}||���}||kr'q|dur8|dur3d|_n|j|=q|durT|jd|�||j|t|�d�}	|	|_q|�|�}
205  |sft|
206  �t|�krf|}	n|
207  d|�||
208  |t|�d�}	|�||	�qdS)a�
209          Rewrite all the links in the document.  For each link
210          ``link_repl_func(link)`` will be called, and the return value
211          will replace the old link.
212  
213          Note that links may not be absolute (unless you first called
214          ``make_links_absolute()``), and may be internal (e.g.,
215          ``'#anchor'``).  They can also be values like
216          ``'mailto:email'`` or ``'javascript:expr'``.
217  
218          If you give ``base_href`` then all links passed to
219          ``link_repl_func()`` will take that into account.
220  
221          If the ``link_repl_func`` returns None, the attribute or
222          tag text will be removed completely.
223          N)rrA)	rrrr�r�rsr:rCrp)rEZlink_repl_funcrr�r�rs�linkr-Znew_link�new�curr$r$r%rRs0�(
224  $�zHtmlMixin.rewrite_linksr])r�)NTN)TN)rjrkrlrp�propertyrQ�setterrwrzr~r�r��deleterr�r�rr
r�r�r�rrrrrnr$r$r[r%ro�sF	
225  
226  
227  	
228  
229  
230  
231  
232  
233   
234  
235236  -Z�roc@s&eZdZdZdefdd�Zdd�ZdS)�_MethodFunca5
237      An object that represents a method on an element as a function;
238      the function takes either an element or an HTML string.  It
239      returns whatever the function normally returns, or if the function
240      works in-place (and so returns None) it returns a serialized form
241      of the resulting document.
242      FcCs ||_||_t||j�j|_dSr])rT�copy�getattrrm)rErTr�Zsource_classr$r$r%rG�sz_MethodFunc.__init__cOs�t|�}t|t�rd|vrtd|j��t|fi|��}nd|vr'|�d�}n|j}|r1t�|�}t	||j�}||i|��}|durGt
243  ||�S|S)Nr�zQThe keyword 'copy' can only be used with element inputs to %s, not a string input)�typer8r9r�rTr�popr��deepcopyr�r5)rE�doc�args�kw�result_typeZmake_a_copy�methr4r$r$r%�__call__�s"
244  �
245  
246  z_MethodFunc.__call__N)rjrkrlrmrorGr�r$r$r$r%r��sr�rF)r�r
rTrrrc@�eZdZdS)�HtmlCommentN�rjrkrlr$r$r$r%r���r�c@r�)�HtmlElementNr�r$r$r$r%r��r�r�c@r�)�HtmlProcessingInstructionNr�r$r$r$r%r��r�r�c@r�)�
247  HtmlEntityNr�r$r$r$r%r��r�r�c@s&eZdZdZiZddd�Zdd�ZdS)�HtmlElementClassLookupavA lookup scheme for HTML Element classes.
248  
249      To create a lookup instance with different Element classes, pass a tag
250      name mapping of Element classes in the ``classes`` keyword argument and/or
251      a tag name mapping of Mixin classes in the ``mixins`` keyword argument.
252      The special key '*' denotes a Mixin class that should be mixed into all
253      Element classes.
254      Nc
255  Cs�tj�|�|dur|j��}|rWi}|D]"\}}|dkr.|��D]}|�|g��|�q!q|�|g��|�q|��D]\}}|�	|t
256  �}t||g�}	t|j
|	i�||<q<||_dS)N�*)r�CustomElementClassLookuprG�_default_element_classesr��keys�
257  setdefaultrM�itemsrCr��tupler�rj�_element_classes)
258  rErQ�mixinsZmixersrTrP�nZ	mix_basesr��basesr$r$r%rG�s 
259  �
260  zHtmlElementClassLookup.__init__cCsB|dkr
|j�|��t�S|dkrtS|dkrtS|dkrtSdS)N�element�comment�PI�entity)r�rCr�r�r�r�r�)rE�	node_type�document�	namespacerTr$r$r%�lookup�szHtmlElementClassLookup.lookup�NN)rjrkrlrmr�rGr�r$r$r$r%r��s
261  
262  r�z^\s*<(?:html|!doctype)�asciicKsx|durt}tj||fi|��}|durt�d��|r*|�d�dur*|�dtd��|r:|�d�dur:|�td��|S)NzDocument is emptyr�rr~)�html_parserrr�ParserErrorr��insertrrM)r��parserZensure_head_bodyr�rPr$r$r%r�s�rc	Ks�|durt}t|t�rt|�sd�d�|d�d�}nt|�s$d|}t|f||d�|��}t|j�dks8J�dd	�|D�}t	|�d
263  ksMJd||f��|d}g}|re|j
264  re|j
265  ��ret�
d
|j
266  ��|j
267  rs|j
268  ��rs|�|j
269  �|�|�|S)aRParses several HTML elements, returning a list of elements.
270  
271      The first item in the list may be a string.
272      If no_leading_text is true, then it will be an error if there is
273      leading text, and it will always be a list of only elements.
274  
275      base_url will set the document's base_url attribute
276      (and the tree's docinfo.URL).
277      Nz<html><body>r�z</body></html>z<html><body>%s</body></html>�rrwr�cSsg|]}t|j�dkr|�qS)r~)r>r=�rS�er$r$r%rVsz(fragments_fromstring.<locals>.<listcomp>rztoo many bodies: %r in %rrzThere is leading text: %r)r�r8r2�_looks_like_full_html_bytes�encode�_looks_like_full_html_unicoderr>r=r:r�r�rrrM�extend)	r��no_leading_textrwrr�r��bodiesr~�elementsr$r$r%rs0
278  ���
279  rc	Ks�|durt}t|�}t|f|||d�|��}|r<t|t�s d}t|�}|r:t|dt�r5|d|_|d=|�|�|S|sCt�	d��t
280  |�dkrXt�	dd�d	d
281  �|D����|d}|jrl|j�
�rlt�	d|j��d|_|S)a
282  
283      Parses a single HTML element; it is an error if there is more than
284      one element, or if anything but whitespace precedes or follows the
285      element.
286  
287      If ``create_parent`` is true (or is a tag name) then a parent node
288      will be created to encapsulate the HTML in a single element.  In this
289      case, leading or trailing text is also allowed, as are multiple elements
290      as result of the parsing.
291  
292      Passing a ``base_url`` will set the document's ``base_url`` attribute
293      (and the tree's docinfo.URL).
294      N)rr
295  rw�divrzNo elements foundrzMultiple elements found (%s)�, cS�g|]}t|��qSr$)�
_element_namerr$r$r%rVQ�z'fragment_fromstring.<locals>.<listcomp>zElement followed by text: %r)r��boolrr8r9rr�r	rrr:rNr�r�)	r�Z
create_parentrwrr�Zaccept_leading_textr�new_rootr�r$r$r%r)sH���
296  
297  
298  
299  ���rcKs�|durt}t|t�rt|�}nt|�}t|f||d�|��}|r#|S|�d�}|s1|�dt�}|rn|d}t|�dkrm|dd�D])}|j	rct|�rZ|dj
300  pRd|j	|d_
301  n	|j	p^d|j	|_	|�|�|��qCnd}|�d	�}	|	s~|�d
302  t�}	|	r�|	d}
303  t|	�dkr�|	dd�D]}|
304  �|�|��q�|S|dur�|St|�dkr�|j	r�|j	�
�s�|dj
305  r�|dj
306  �
�s�|dSt|�r�d|_|Sd|_|S)
a
307      Parse the html, returning a single element/document.
308  
309      This tries to minimally parse the chunk of text, without knowing if it
310      is a fragment or a document.
311  
312      base_url will set the document's base_url attribute (and the tree's docinfo.URL)
313      Nrr~z{%s}bodyrrr+rAr�z{%s}headr
�span)r�r8r2rrr�findallr;r:r�r�r	r�r��_contains_block_level_tagr=)r�rwrr�Zis_full_htmlr�rr~Z
314  other_body�headsr�Z
315  other_headr$r$r%rZs\	
316  
317  
318  
319  
320  �
321  
322  
323  ���rcKs$|durt}tj||fd|i|��S)a=
324      Parse a filename, URL, or file-like object into an HTML document
325      tree.  Note: this returns a tree, not an element.  Use
326      ``parse(...).getroot()`` to get the document root.
327  
328      You can override the base URL with the ``base_url`` keyword.  This
329      is most useful when parsing from a file-like object.
330      Nrw)r�rr)Zfilename_or_urlrrwr�r$r$r%r�s	rcCs,|�tj�D]
}t|j�tjvrdSqdS�NTF)r`rrr>r=r	�
331  block_tags�r�r$r$r%r�s
332  �rcCs(t|tj�rdSt|t�rdSt|j�S)Nr��string)r8r�CommentBaser9r>r=rr$r$r%r�s
333  
334  
335  rc@s�eZdZdZedd��Zedd��Zejdd��Zdd�Zd	d
336  �Z	edd��Z
337  e
338  jd
d��Z
339  e
340  jdd��Z
341  edd��Zejdd��ZdS)�FormElementz&
342      Represents a <form> element.
343      cCrx)z�
344          Returns an accessor for all the input elements in the form.
345  
346          See `InputGetter` for more information about the object.
347          )�InputGetterrar$r$r%�inputs�szFormElement.inputscCrr)z�
348          Dictionary-like object that represents all the fields in this
349          form.  You can set values in this dictionary to effect the
350          form.
351          )�
352  FieldsDictrrar$r$r%�fields�s
353  zFormElement.fieldscCsZ|j}|��}|��D]\}}||vr|�|�|||<q|D]}|dur&qd||<qdSr])r!r�r�rY)rErPr!Z	prev_keysrqr$r$r%r!�s
354  
355  
356  �cCsb|�d�r
357  |�d�S|�d�rd|�d�S|jj}t|d��}|s*t|dt��}t|�|��S)NrTr��#�formz{%s}form)rCr~r`r�r;�strr�)rEZ	iter_tagsrzr$r$r%�_name�s
358  
359  
360  zFormElement._namecCs�g}|jD]i}|j}|rd|jvrqt|j�}|dkr$|�||jf�q|dkrH|j}|jr;|D]	}|�||f�q0q|durG|�||jf�q|dksRJd|��|jrY|j	sYq|j
361  dvr_q|j}|durn|�||jf�q|S)z�
362          Return a list of tuples of the field values for the form.
363          This is suitable to be passed to ``urllib.urlencode()``.
364          �disabled�textarea�selectN�inputzUnexpected tag: %r)�submit�image�reset�file)rrTrsr>r=rMrP�multiple�	checkable�checkedr�)rE�resultsr�rTr=rP�vr$r$r%�form_values�s:
365  
366  ��
367  �
368  �zFormElement.form_valuescCs*|j}|�d�}|r|durt||�S|S)z:
369          Get/set the form's ``action`` attribute.
370          �actionN)rwrCr)rErwr4r$r$r%r4s
371  
372  
373  zFormElement.actioncC�|�d|�dS�Nr4�rprZr$r$r%r4�cC�|j}d|vr|d=dSdSr6�rs�rErsr$r$r%r4#�
374  �cC�|�dd���S)zt
375          Get/set the form's method.  Always returns a capitalized
376          string, and defaults to ``'GET'``
377          �method�GET)rC�upperrar$r$r%r>)szFormElement.methodcCs|�d|���dS)Nr>)rpr@rZr$r$r%r>1sN)
rjrkrlrmr�rr!r�r%r3r4r�r>r$r$r$r%r�s(
378  
379  
380   
381  
382  
383  
384  rr#cCsV|��}|rt|d�r|��}|�|�|durt}|jr!|j}n|j}||j||�S)a%
385      Helper function to submit a form.  Returns a file-like object, as from
386      ``urllib.urlopen()``.  This object also has a ``.geturl()`` function,
387      which shows the URL if there were any redirects.
388  
389      You can use this like::
390  
391          form = doc.forms[0]
392          form.inputs['foo'].value = 'bar' # etc
393          response = form.submit()
394          doc = parse(response)
395          doc.make_links_absolute(response.geturl())
396  
397      To change the HTTP requester, pass a function as ``open_http`` keyword
398      argument that opens the URL for you.  The function must have the following
399      signature::
400  
401          open_http(method, URL, values)
402  
403      The action is one of 'GET' or 'POST', the URL is the target URL as a
404      string, and the values are a sequence of ``(name, value)`` tuples with the
405      form data.
406      r�N)r3�hasattrr�r	�open_http_urllibr4rwr>)r#Zextra_values�	open_httprer�r$r$r%r9s
407  
408  rcCs�|std��z
409  ddlm}m}Wnty%ddlm}ddlm}Ynw|dkr@d|vr3|d7}n|d7}|||�7}d}n||�}t|t�sN|�	d	�}|||�S)
410  Nzcannot submit, no URL providedr)�	urlencode�urlopen)rE)rDr?�?�&�ASCII)
411  rL�urllibrDrE�ImportError�urllib.request�urllib.parser8r2r)r>r�rerDrEr�r$r$r%rB_s$�
412  
413  
414  
415  rBc@sTeZdZdd�Zdd�Zdd�Zdd�Zd	d
416  �Zdd�Zd
d�Z	dd�Z
417  dd�ZdS)r cC�
418  ||_dSr]�r)rErr$r$r%rGxr�zFieldsDict.__init__cCs|j|jSr]�rrP�rE�itemr$r$r%�__getitem__zszFieldsDict.__getitem__cCs||j|_dSr]rO)rErQrPr$r$r%�__setitem__|rczFieldsDict.__setitem__cCstd��)Nz'You cannot remove keys from ElementDict)r�rPr$r$r%�__delitem__~s�zFieldsDict.__delitem__cCs
419  |j��Sr])rr�rar$r$r%r��r�zFieldsDict.keyscCs
420  ||jvSr]rNrPr$r$r%r^�r�zFieldsDict.__contains__cCst|j���Sr])r`rr�rar$r$r%rb�szFieldsDict.__iter__cCrrr])r:rrar$r$r%rd�r�zFieldsDict.__len__cCsd|jj|jj��fS�Nz<%s for form %s>)r\rjrr#r%rar$r$r%�__repr__�s
421  �zFieldsDict.__repr__N)rjrkrlrGrRrSrTr�r^rbrdrVr$r$r$r%r vsr c@sPeZdZdZdd�Zdd�Zdd�Zdd	�Zd
422  d�Zdd
�Z	dd�Z
423  dd�ZdS)ra[
424      An accessor that represents all the input fields in a form.
425  
426      You can get fields by name from this, with
427      ``form.inputs['field_name']``.  If there are a set of checkboxes
428      with the same name, they are returned as a list (a `CheckboxGroup`
429      which also allows value setting).  Radio inputs are handled
430      similarly.  Use ``.keys()`` and ``.items()`` to process all fields
431      in this way.
432  
433      You can also iterate over this to get all input elements.  This
434      won't return the same thing as if you get all the names, as
435      checkboxes and radio elements are returned individually.
436      cCrMr])r#)rEr#r$r$r%rG�r�zInputGetter.__init__cCsd|jj|j��fSrU)r\rjr#r%rar$r$r%rV���zInputGetter.__repr__cs��fdd�|D�}|std���|d�d�}|dkr+t|�dkr+t|�}�|_|S|dkr>t|�dkr>t|�}�|_|S|dS)	Ncsg|]	}|j�kr|�qSr$�rT)rS�fieldrXr$r%rV�sz+InputGetter.__getitem__.<locals>.<listcomp>z!No input element with the name %rrr��radior�checkbox)r�rCr:�
437  RadioGrouprT�
CheckboxGroup)rErTr!Z
438  input_typer�r$rXr%rR�szInputGetter.__getitem__cCs|D]
439  }|j|krdSqdSrrX)rErTrYr$r$r%r^�s
440  
441  �zInputGetter.__contains__cCs:g}dh}|D]}|j}||vr|�|�|�|�q|S)zx
442          Returns all unique field names, in document order.
443  
444          :return: A list of all unique field names.
445          N)rTrMrR)rE�names�seenr�rTr$r$r%r��s
446  
447  �zInputGetter.keyscCsBg}t�}|D]}|j}||vr|�|�|�|||f�q|S)z�
448          Returns all fields with their names, similar to dict.items().
449  
450          :return: A list of (name, field) tuples.
451          )rprTrRrM)rEr�r_r�rTr$r$r%r��s
452  �zInputGetter.itemscCs|j�ddd�S)Nr(r)r')r#r`rar$r$r%rb�rczInputGetter.__iter__cCstdd�|D��S)Ncss�|]}dVqdS)rNr$)rS�_r$r$r%�	<genexpr>�s�z&InputGetter.__len__.<locals>.<genexpr>)�sumrar$r$r%rd�szInputGetter.__len__N)rjrkrlrmrGrVrRr^r�r�rbrdr$r$r$r%r�src@s@eZdZdZedd��Zejdd��Zejdd��Zdd�ZdS)	�
453  InputMixinzE
454      Mix-in for all input elements (input, select, and textarea)
455      cCs
456  |�d�S)z1
457          Get/set the name of the element
458          rT�rCrar$r$r%rT�rtzInputMixin.namecCr5�NrTr7rZr$r$r%rT�r8cCr9rer:r;r$r$r%rT�r<cCs8t|dd�}|r
d|}nd}d|jjt|�|j|fS)Nr�z type=%rrAz<%s %x name=%r%s>)r�r\rjr�rT)rE�	type_namer$r$r%rV�s
459  �zInputMixin.__repr__N)	rjrkrlrmr�rTr�r�rVr$r$r$r%rc�s
460  
461  
462  rcc@�8eZdZdZedd��Zejdd��Zejdd��ZdS)�TextareaElementzp
463      ``<textarea>`` element.  You can get the name with ``.name`` and
464      get/set the value with ``.value``
465      cCsF|jpd}|j�dt�rd}nd}|D]}|tj||dd�7}q|S)zK
466          Get/set the value (which is the contents of this element)
467          rA�{%s}�xmlr�r0�r>r/)r�r=�
468  startswithr;rr)rEr�Zserialisation_methodr�r$r$r%rP
s
469  
470  �zTextareaElement.valuecCs|dd�=||_dSr]�r�rZr$r$r%rPs
471  
472  cCsd|_|dd�=dS)NrArmrar$r$r%rP"sN)rjrkrlrmr�rPr�r�r$r$r$r%rhs
473  
474  rhr'c@s^eZdZdZedd��Zejdd��Zejdd��Zedd��Zedd	��Z	e	jd
475  d	��Z	dS)�
SelectElementa�
476      ``<select>`` element.  You can get the name with ``.name``.
477  
478      ``.value`` will be the value of the selected option, unless this
479      is a multi-select element (``<select multiple>``), in which case
480      it will be a set-like object.  In either case ``.value_options``
481      gives the possible values.
482  
483      The boolean attribute ``.multiple`` shows if this is a
484      multi-select.
485      cCs�|jrt|�St|�}z
tdd�t|�D��}Wn ty8ztdd�|D��}Wnty5YYdSwYnw|�d�}|durI|jpFd��}|S)z�
486          Get/set the value of this select (the selected option).
487  
488          If this is a multi-select, this is a set-like object that
489          represents all the selected options.
490          css"�|]}|�d�dur|VqdS)�selectedNrdr�r$r$r%raD�� z&SelectElement.value.<locals>.<genexpr>css"�|]}|�d�dur|VqdS)r&Nrdr�r$r$r%raGrpNrPrA)	r.�MultipleSelectOptions�_options_xpath�next�reversed�
StopIterationrCr�r�)rE�optionsZselected_optionrPr$r$r%rP7s"���
491  zSelectElement.valuecCs�|jrt|t�rtd��|j}|��|�|�dSd}|durEt|�D]}|�d�}|dur6|j	p3d�
492  �}||kr>|}nq$td|��t|�D]}d|jvrT|jd=qI|dura|�
dd�dSdS)NzYou must pass in a sequencerPrAz'There is no option with the value of %rro)r.r8r9r�rP�clearrgrrrCr�r�rLrsrp)rErPre�checked_optionr��	opt_valuer$r$r%rPOs6
493  
494  
495  ��
496  ��cCs|jr
497  |j��dSd|_dSr])r.rPrwrar$r$r%rPjs
498  cCs@g}t|�D]}|�d�}|dur|jpd��}|�|�q|S)z�
499          All the possible values this select can have (the ``value``
500          attribute of all the ``<option>`` elements.
501          rPNrA)rrrCr�r�rM)rErvr�rPr$r$r%�
value_optionsrs
502  zSelectElement.value_optionscCs
503  d|jvS)zW
504          Boolean attribute: is there a ``multiple`` attribute on this element.
505          r.r:rar$r$r%r.�rtzSelectElement.multiplecCs.|r
506  |�dd�dSd|jvr|jd=dSdS)Nr.rA)rprsrZr$r$r%r.�s
507  
508  �N)
509  rjrkrlrmr�rPr�r�rzr.r$r$r$r%rn+s
510  
511  
512  
513  
514  rnr(c@sDeZdZdZdd�Zedd��Zdd�Zdd	�Zd
515  d�Z	dd
�Z
516  dS)rqz�
517      Represents all the selected options in a ``<select multiple>`` element.
518  
519      You can add to this set-like option to select an option, or remove
520      to unselect the option.
521      cCrMr])r()rEr(r$r$r%rG�r�zMultipleSelectOptions.__init__cCstt|j��S)z<
522          Iterator of all the ``<option>`` elements.
523          )r`rrr(rar$r$r%rv��zMultipleSelectOptions.optionsccsB�|jD]}d|jvr|�d�}|dur|jpd��}|VqdS)NrorPrA)rvrsrCr�r�)rE�optionryr$r$r%rb�s�
524  
525  
526  ��zMultipleSelectOptions.__iter__cCsR|jD]}|�d�}|dur|jpd��}||kr"|�dd�dSqtd|��)NrPrAroz$There is no option with the value %r)rvrCr�r�rprL�rErQr|ryr$r$r%rR�s
527  
528  ��zMultipleSelectOptions.addcCsd|jD](}|�d�}|dur|jpd��}||kr+d|jvr%|jd=dStd|��qtd|��)NrPrAroz'The option %r is not currently selectedz%There is not option with the value %r)rvrCr�r�rsrLr}r$r$r%rY�s
529  
530  
531  ����zMultipleSelectOptions.removecC�&d|jjd�dd�|D��|jjfS)Nz<%s {%s} for select name=%r>rcSrr$��repr�rSr2r$r$r%rV�rz2MultipleSelectOptions.__repr__.<locals>.<listcomp>)r\rjrNr(rTrar$r$r%rV��
532  �zMultipleSelectOptions.__repr__N)rjrkrlrmrGr�rvrbrRrYrVr$r$r$r%rq�s
533  rqc@�LeZdZdZedd��Zejdd��Zejdd��Zedd��Zdd	�Z	d
534  S)r\a
535      This object represents several ``<input type=radio>`` elements
536      that have the same name.
537  
538      You can use this like a list, but also use the property
539      ``.value`` to check/uncheck inputs.  Also you can use
540      ``.value_options`` to get the possible values.
541      cCs&|D]}d|jvr|�d�SqdS)zs
542          Get/set the value, which checks the radio with that value (and
543          unchecks any other value).
544          r0rPN�rsrC)rEr�r$r$r%rP�s
545  
546  �zRadioGroup.valuecCspd}|dur|D]
}|�d�|kr|}nqtd|��|D]}d|jvr)|jd=q|dur6|�dd�dSdS)NrPz)There is no radio input with the value %rr0rA)rCrLrsrp)rErPrxr�r$r$r%rP�s�
547  ��cCs
548  d|_dSr]rUrar$r$r%rP�s
549  cC�dd�|D�S)�<
550          Returns a list of all the possible values.
551          cS�g|]}|�d��qSrUrdr�r$r$r%rV��z,RadioGroup.value_options.<locals>.<listcomp>r$rar$r$r%rz�r{zRadioGroup.value_optionscC�d|jjt�|�fS�Nz%s(%s)�r\rjr�rVrar$r$r%rV�rWzRadioGroup.__repr__N�
552  rjrkrlrmr�rPr�r�rzrVr$r$r$r%r\�s
553  
554  
555  
556  
557  r\c@r�)r]aS
558      Represents a group of checkboxes (``<input type=checkbox>``) that
559      have the same name.
560  
561      In addition to using this like a list, the ``.value`` attribute
562      returns a set-like object that you can add to or remove from to
563      check and uncheck checkboxes.  You can also use ``.value_options``
564      to get the possible values.
565      cCrx)z�
566          Return a set-like object that can be modified to check or
567          uncheck individual checkboxes according to their value.
568          )�CheckboxValuesrar$r$r%rPszCheckboxGroup.valuecCs<|j}|��t|d�std|dj|f��|�|�dS)Nrbz<A CheckboxGroup (name=%r) must be set to a sequence (not %r)r)rPrwrArLrTrg)rErPrer$r$r%rPs
569  ��cCs|j��dSr])rPrwrar$r$r%rP scCr�)r�cSr�rUrdr�r$r$r%rV)r�z/CheckboxGroup.value_options.<locals>.<listcomp>r$rar$r$r%rz$r{zCheckboxGroup.value_optionscCr�r�r�rar$r$r%rV+s�zCheckboxGroup.__repr__Nr�r$r$r$r%r]s	
570  
571  	
572  
573  r]c@s8eZdZdZdd�Zdd�Zdd�Zdd	�Zd
574  d�ZdS)
r�zj
575      Represents the values of the checked checkboxes in a group of
576      checkboxes with the same name.
577      cCrMr])r�)rEr�r$r$r%rG6r�zCheckboxValues.__init__cCstdd�|jD��S)NcSs g|]}d|jvr|�d��qS)r0rPr�r�r$r$r%rV:s
578  
579  �z+CheckboxValues.__iter__.<locals>.<listcomp>)r`r�rar$r$r%rb9s�zCheckboxValues.__iter__cCs8|jD]}|�d�|kr|�dd�dSqtd|��)NrPr0rA�No checkbox with value %r)r�rCrpr��rErPr�r$r$r%rR?s
580  �zCheckboxValues.addcCsJ|jD]}|�d�|krd|jvr|jd=dStd|��qtd|��)NrPr0z0The checkbox with value %r was already uncheckedr�)r�rCrsr�r�r$r$r%rYGs
581  
582  ����zCheckboxValues.removecCr~)Nz <%s {%s} for checkboxes name=%r>rcSrr$rr�r$r$r%rVWrz+CheckboxValues.__repr__.<locals>.<listcomp>)r\rjrNr�rTrar$r$r%rVTr�zCheckboxValues.__repr__N)	rjrkrlrmrGrbrRrYrVr$r$r$r%r�0s
r�c@sxeZdZdZedd��Zejdd��Zejdd��Zedd��Zejdd��Zed	d
583  ��Z	edd��Z
584  e
585  jd
d��Z
586  dS)�InputElementaZ
587      Represents an ``<input>`` element.
588  
589      You can get the type with ``.type`` (which is lower-cased and
590      defaults to ``'text'``).
591  
592      Also you can get and set the value with ``.value``
593  
594      Checkboxes and radios have the attribute ``input.checkable ==
595      True`` (for all others it is false) and a boolean attribute
596      ``.checked``.
597  
598      cCs(|jr|jr
|�d�pdSdS|�d�S)z�
599          Get/set the value of this element, using the ``value`` attribute.
600  
601          Also, if this is a checkbox and it has no value, this defaults
602          to ``'on'``.  If it is a checkbox or radio that is not
603          checked, this returns None.
604          rP�onN)r/r0rCrar$r$r%rPks
605  	
606  zInputElement.valuecCsH|jr|s
607  d|_dSd|_t|t�r|�d|�dSdS|�d|�dS)NFTrP)r/r0r8r9rprZr$r$r%rP{s
608  
609  �cCs*|jrd|_dSd|jvr|jd=dSdS)NFrP)r/r0rsrar$r$r%rP�s
610  
611  
612  �cCr=)zM
613          Return the type of this element (using the type attribute).
614          r�r�r�rar$r$r%r��szInputElement.typecCr5)Nr�r7rZr$r$r%r��r8cCs
615  |jdvS)z7
616          Boolean: can this element be checked?
617          )r[rZ)r�rar$r$r%r/�rtzInputElement.checkablecCs|jstd��d|jvS)z�
618          Boolean attribute to get/set the presence of the ``checked``
619          attribute.
620  
621          You can only use this on checkable input types.
622          �Not a checkable input typer0)r/�AttributeErrorrsrar$r$r%r0�s
623  zInputElement.checkedcCs>|jstd��|r|�dd�dS|j}d|vr|d=dSdS)Nr�r0rA)r/r�rprs)rErPrsr$r$r%r0�s
624  �N)rjrkrlrmr�rPr�r�r�r/r0r$r$r$r%r�[s$
625  
626  
627  
628  
629  
630  
631  r�r)c@rg)�LabelElementz�
632      Represents a ``<label>`` element.
633  
634      Label elements are linked to other elements with their ``for``
635      attribute.  You can access this element with ``label.for_element``.
636      cCs|�d�}|s	dS|j�|�S)zf
637          Get/set the element this label points to.  Return None if it
638          can't be found.
639          r�N)rCr~r�)rEr�r$r$r%�for_element�s
640  zLabelElement.for_elementcCs*|�d�}|s
td|��|�d|�dS)Nr�zElement %r has no id attributer�)rCr�rp)rE�otherr�r$r$r%r��s
641  �cCr9)Nr�r:r;r$r$r%r��r<N)rjrkrlrmr�r�r�r�r$r$r$r%r��s
642  
643  
644  r�r�cCsZz|��}Wn	tyYnwdt}|�tj�D]}|j}|ddkr*|||_qdS)zYConvert all tags in an HTML tree to XHTML by moving them to the
645      XHTML namespace.
646      rirr6N)�getrootr�r;r`rrr=)r��prefixr�r=r$r$r%�
html_to_xhtml�s�
647  ��r�cCsXz|��}Wn	tyYnwdt}t|�}|�|d�D]
648  }|j|d�|_qdS)zUConvert all tags in an XHTML tree to HTML by removing their
649      XHTML namespace.
650      rir�N)r�r�r;r:r`r=)�xhtmlr��
651  prefix_lenr�r$r$r%�
xhtml_to_html�s��r�z%<meta http-equiv="Content-Type"[^>]*>rHr�cCsJtj||||||d�}|dkr#|s#t|t�rtd|�}|Stt�|�}|S)a$
652  Return an HTML string representation of the document.
653  
654      Note: if include_meta_content_type is true this will create a
655      ``<meta http-equiv="Content-Type" ...>`` tag in the head;
656      regardless of the value of include_meta_content_type any existing
657      ``<meta http-equiv="Content-Type" ...>`` tag will be removed
658  
659      The ``encoding`` argument controls the output encoding (defaults to
660      ASCII, with &#...; character references for any characters outside
661      of ASCII).  Note that you can pass the name ``'unicode'`` as
662      ``encoding`` argument to serialise to a Unicode string.
663  
664      The ``method`` argument defines the output method.  It defaults to
665      'html', but can also be 'xml' for xhtml output, or 'text' to
666      serialise to plain text without markup.
667  
668      To leave out the tail text of the top-level element that is being
669      serialised, pass ``with_tail=False``.
670  
671      The ``doctype`` option allows passing in a plain string that will
672      be serialised before the XML tree.  Note that passing in non
673      well-formed content here will make the XML output non well-formed.
674      Also, an existing doctype in the document tree will not be removed
675      when serialising an ElementTree instance.
676  
677      Example::
678  
679          >>> from lxml import html
680          >>> root = html.fragment_fromstring('<p>Hello<br>world!</p>')
681  
682          >>> html.tostring(root)
683          b'<p>Hello<br>world!</p>'
684          >>> html.tostring(root, method='html')
685          b'<p>Hello<br>world!</p>'
686  
687          >>> html.tostring(root, method='xml')
688          b'<p>Hello<br/>world!</p>'
689  
690          >>> html.tostring(root, method='text')
691          b'Helloworld!'
692  
693          >>> html.tostring(root, method='text', encoding='unicode')
694          u'Helloworld!'
695  
696          >>> root = html.fragment_fromstring('<div><p>Hello<br>world!</p>TAIL</div>')
697          >>> html.tostring(root[0], method='text', encoding='unicode')
698          u'Helloworld!TAIL'
699  
700          >>> html.tostring(root[0], method='text', encoding='unicode', with_tail=False)
701          u'Helloworld!'
702  
703          >>> doc = html.document_fromstring('<p>Hello<br>world!</p>')
704          >>> html.tostring(doc, method='html', encoding='unicode')
705          u'<html><body><p>Hello<br>world!</p></body></html>'
706  
707          >>> print(html.tostring(doc, method='html', encoding='unicode',
708          ...          doctype='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'
709          ...                  ' "http://www.w3.org/TR/html4/strict.dtd">'))
710          <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
711          <html><body><p>Hello<br>world!</p></body></html>
712      )r>�pretty_printr/�	with_tail�doctyper�rA)rrr8r$�__str_replace_meta_content_type�!__bytes_replace_meta_content_typer2)r�r�Zinclude_meta_content_typer/r>r�r�r�r$r$r%r	s
713  ?�
714  
715  �rc	Cs�ddl}ddl}ddl}t|tj�st�|�}|jdd�\}}|�|d�}z|j	|d|p1|j
716  jp1dd�W|��n|��wd	|�
|jjd
717  �}t|�|�|�dS)z�
718      Open the HTML document in a web browser, saving it to a temporary
719      file to open it.  Note that this does not delete the file after
720      use.  This is mainly meant for debugging.
721      rNz.html)�suffix�wbr�zUTF-8rkzfile://�/)�os�
722  webbrowser�tempfiler8rZ_ElementTree�ElementTree�mkstemp�fdopen�writerur/�close�replace�path�sep�print�open)	r�r/r�r�r��handle�fn�fr�r$r$r%r
723  Vs
724  r
725  c� eZdZdZ�fdd�Z�ZS)�
726  HTMLParserzOAn HTML parser that is configured to return lxml.html Element
727      objects.
728      c�&tt|�jdi|��|�t��dS�Nr$)rXr�rG�set_element_class_lookupr��rE�kwargsr[r$r%rGu�zHTMLParser.__init__�rjrkrlrmrGrnr$r$r[r%r�qsr�cr�)�XHTMLParsera(An XML parser that is configured to return lxml.html Element
729      objects.
730  
731      Note that this parser is not really XHTML aware unless you let it
732      load a DTD that declares the HTML entities.  To do this, make sure
733      you have the XHTML DTDs installed in your catalogs, and create the
734      parser like this::
735  
736          >>> parser = XHTMLParser(load_dtd=True)
737  
738      If you additionally want to validate the document, use this::
739  
740          >>> parser = XHTMLParser(dtd_validation=True)
741  
742      For catalog support, see http://www.xmlsoft.org/catalog.html.
743      cr�r�)rXr�rGr�r�r�r[r$r%rG�r�zXHTMLParser.__init__r�r$r$r[r%r�zsr�cOstj|i|��}|S)zOCreate a new HTML Element.
744  
745      This can also be used for XHTML documents.
746      )r��makeelement)r�r�r2r$r$r%r�sr)NF)FNNr�)FFNr�TNr])lrm�
747  __future__r�__all__r�rr�	functoolsr�collections.abcrrrJ�collectionsrArr	Z	_setmixinr�urlparserrLr0�	NameErrorr$r9r2r&r;�XPathr�rrryr�r�r�r �Ir�r�r�r�r�rKr�r.r5r>r?r�ror�rr
rrrrrr�ZElementBaser�ZPIBaser�Z
748  EntityBaser�r�r�r�rrrrrrrrrrrr�rrBr rrcrhrnrqr�r\r]r�r�r�r�r�r"r�r�rr
749  r��	XMLParserr�rr��xhtml_parserr$r$r$r%�<module>s����
750  ���
751  
752  
753  �
754  ��l$.
755  ����
756  
757  �'
758759  1
760  D	
761  t
762  &Y
763   
764  d=5,+
765  ^
766  !����
767  �J
768  		
769