[Python] Html parse islemi

from lxml import html

with open(r'sozluk.html', "r") as f:
    sayfa = f.read()
t = html.fromstring(sayfa)
"""
--- ORNEK HTML ---
<p class="Normal para-style-override-2" xml:lang="tr-TR"><span class="char-style-override-3" xml:lang="en-US">abaküs </span><span class="char-style-override-4" xml:lang="en-US">abacus</span></p>
<p class="Normal para-style-override-2" xml:lang="tr-TR"><span class="char-style-override-3" xml:lang="en-US">abiyotik </span><span class="char-style-override-4" xml:lang="en-US">abiotic</span></p>
"""
kelime1 = t.xpath('//span[@class="char-style-override-3"]/text()')
kelime2 = t.xpath('//span[@class="char-style-override-4"]/text()')

ii = 0
for x,y in zip(kelime1,kelime2):
    ii += 1
    print (ii,";{};{}".format(x,y))


RELATED POST

Your email address will not be published. Required fields are marked *