
Oscar Iyañez
PreguntaHola. Estoy siguiendo todas las lineas de código y me crea la carpeta con la fecha actual correctamete. Solo me falta el creado de los archivos txt de cada nota.
Me manda este error
Failed to parse: Las pruebas de coronavirus en Estados Unidos tienen problemas “alucinantes”, aseguró Bill Gates
¿Alguén sabe porque ?
Este es mi codigo
import requests import lxml.html as html import os import datetime HOME_URL = 'https://www.larepublica.co/' XPATH_LINK_TO_ARTICLE = '//h2[not(@class)]/a/text()' XPATH_TITLE = '//h1[@class="economiaSect"]/i/text()' XPATH_SUMMARY = '//div[@class="lead"]/p/text()' XPATH_BODY = '//div[@class="html-content"]/p[not(@class)]/text()' def parse_notice(link, today): try: response = requests.get(link) if response.status_code == 200: notice = response.content.decode('utf-8') parsed = html.fromstring(notice) try: title = parsed.xpath(XPATH_TITLE) title = title.replace('\"','') summary = parsed.xpath(XPATH_SUMMARY) body = parsed.xpath(XPATH_BODY) except IndexError: return with open(f'{today}/{title}.txt', 'w', encoding='utf-8') as f: #with open('{}/{}.txt'.format(today,title), 'w', encoding='utf-8') as f: f.write(title) f.write('\n\n') f.write(summary) f.write('\n\n') for p in body: f.write(p) f.write('\n') else: raise ValueError(f'Error: {response.status_code}') except ValueError as ve: print(ve) def parse_home(): try: response = requests.get(HOME_URL) if response.status_code == 200: home = response.content.decode('utf-8') parsed = html.fromstring(home) links_to_notices = parsed.xpath(XPATH_LINK_TO_ARTICLE) # print(links_to_notices) today = datetime.date.today().strftime('%d-%m-%Y') if not os.path.isdir(today): os.mkdir(today) for link in links_to_notices: parse_notice(link, today) else: raise ValueError(f'Error: {response.status_code}' ) except ValueError as ve: print(ve) def run(): parse_home() if __name__ == '__main__': run()