from lxml import html
with open(r'sozluk.html', "r") as f:
sayfa = f.read()
t = html.fromstring(sayfa)
"""
--- ORNEK HTML ---
<p class="Normal para-style-override-2" xml:lang="tr-TR"><span class="char-style-override-3" xml:lang="en-US">abaküs </span><span class="char-style-override-4" xml:lang="en-US">abacus</span></p>
<p class="Normal para-style-override-2" xml:lang="tr-TR"><span class="char-style-override-3" xml:lang="en-US">abiyotik </span><span class="char-style-override-4" xml:lang="en-US">abiotic</span></p>
"""
kelime1 = t.xpath('//span[@class="char-style-override-3"]/text()')
kelime2 = t.xpath('//span[@class="char-style-override-4"]/text()')
ii = 0
for x,y in zip(kelime1,kelime2):
ii += 1
print (ii,";{};{}".format(x,y))
Comments
Leave a Comment