Aún no tienes acceso a esta clase

Crea una cuenta y continúa viendo este curso

Curso de Scrapy

Curso de Scrapy

Facundo García Martoni

Facundo García Martoni

Configuraciones útiles

16/27
Recursos

Aportes 67

Preguntas 2

Ordenar por:

¿Quieres ver más aportes, preguntas y respuestas de la comunidad? Crea una cuenta o inicia sesión.

Hola, comparto mi solución al reto utilizando un ciclo for:

def parse_only_quotes(self,response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
            
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath('//small[@class="author"]/text()').getall())


        next_button= response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()

        if next_button:
            yield response.follow(next_button, callback=self.parse_only_quotes, cb_kwargs={'quotes':quotes, 'authors': authors})
        else:
            i = 0
            quotes_author =[]
            for i in range(len(quotes)):
                quotes_author.append(quotes[i])
                quotes_author.append(authors[i])
            
            yield{
                'quotes': quotes_author
            }
           
    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath('//small[@class="author"]/text()').getall()
        tags = response.xpath('//div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            tags = tags[:top]

        yield {
            'title': title,
            'tags': tags
        }

        next_button = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_button:
            yield response.follow(next_button, callback=self.parse_only_quotes, cb_kwargs= {'quotes':quotes,'authors':authors})```



- Sin embargo, después de investigar, con la función zip, reducimos a unas pocas lineas la solución 



else:
quotes_author = list(zip(quotes, authors))
yield{
‘quotes’: quotes_author
}```

He aquí mi resultado:

[
  {
    "title": "Quotes to Scrape",
    "top_tags": [
      "love",
      "inspirational",
      "life",
      "humor",
      "books",
      "reading",
      "friendship"
    ],
    "quotes": [
      {
        "quote": "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”",
        "author": "Albert Einstein"
      },
      {
        "quote": "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",
        "author": "J.K. Rowling"
      },
	...
	...
    ]
  }
]

Ejecutar con:

$ rm quotes.json | scrapy crawl quotes -a top=7

Aquí el código fuente:

import scrapy

class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'http://quotes.toscrape.com'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'PepitoPerez',
        'FEED_EXPORT_ENCODING': 'utf-8'
    }
    
    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            # Generate new Quotes (Page)
            new_quotes = self.get_all_author_quotes(response)
            kwargs["quotes"].extend(new_quotes)
            # Get the new link
            next_page = self.get_next_link(response)
            if next_page:
                yield response.follow(
                    next_page,
                    callback=self.parse_only_quotes,
                    cb_kwargs=kwargs
                )
            else:
                yield kwargs
        

    def parse(self, response):
        # Basic Data
        title = self.get_title(response)
        quotes = self.get_all_author_quotes(response)
        top_tags = self.get_top_tags(response)

        # Get the new link
        next_page = self.get_next_link(response)
        if next_page:
            yield response.follow(next_page, callback=self.parse_only_quotes,
                cb_kwargs={
                    "title": title, 
                    "top_tags": top_tags,
                    "quotes": quotes
                }
        )

    # Complements (General)
    def get_title(self, response):
        return response.xpath('//h1/a/text()').get()

    def get_top_tags(self, response):
        ''' Generate Content -a [OPTION]\nOption: -a top=3 Get the first 3 of the top '''
        top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()
        # Add Params
        top = getattr(self, 'top', None)
        if top:
            top_tags = top_tags[:int(top)]
        return top_tags


    def get_all_quotes(self, response):
        return response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()

    def get_all_author(self, response):
        return response.xpath('//span/small[@class="author" and @itemprop="author"]/text()').getall()
    
    def get_all_author_quotes(self, response):
        quotes = self.get_all_quotes(response)
        authors = self.get_all_author(response)
        return [({'quote':value_one, 'author': value_two}) for value_one, value_two in zip(quotes, authors)]

    def get_next_link(self, response):
        return response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
import scrapy

class QuotesSpider(scrapy.Spider):
	name = 'quotes'
	start_urls = [
		'https://quotes.toscrape.com/'
	]

	custom_settings = {
		'FEED_URI': 'quotes.json',
		'FEED_FORMAT': 'json',
		'CONCURRENT_REQUESTS': 24,
		'MEMUSAGE_LIMIT_MB': 2048,
		'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
		'ROBOTSTXT_OBEY': True,
		'USER_AGENT': 'Rodrigo',
		'FEED_EXPORT_ENCODING': 'utf8'
	}

	title = '//h1/a/text()'
	quotes = '//span[@class="text" and @itemprop="text"]/text()'
	authors = '//small[@class="author" and @itemprop="author"]/text()'
	top_tags = '//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()'
	next_page_btn = '//ul[@class="pager"]//li[@class="next"]/a/@href'

	def parse_only_quotes(self, response, **kwargs):
		if kwargs:
			quotes = kwargs['quotes']
			authors = kwargs['authors']
		
		quotes.extend(response.xpath(self.quotes).getall())
		authors.extend(response.xpath(self.authors).getall())

		next_page_btn = response.xpath(self.next_page_btn).get()
		if next_page_btn:
			yield response.follow(next_page_btn, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'authors': authors})
		else:
			yield {
				'quotes':  list(zip(quotes, authors))
			}

	def parse(self, response):		
		title = response.xpath(self.title).get()
		quotes = response.xpath(self.quotes).getall()
		authors = response.xpath(self.authors).getall()
		top_tags = response.xpath(self.top_tags).getall()

		top = getattr(self, 'top', None)
		if top:
			top = int(top)
			top_tags = top_tags[:top]

		yield {
			'title': title,
			'top_tags': top_tags
		}

		next_page_btn = response.xpath(self.next_page_btn).get()
		if next_page_btn:
			yield response.follow(next_page_btn, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'authors': authors})


import scrapy


class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = ['http://quotes.toscrape.com/']
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24,
        'FEED_EXPORT_ENCODING': 'utf-8',
    }

    def parse(self, response):
        yield self._top_tags(response)
        yield self._follow_if_nextpage(response, self._quotes_and_authors(response))

    def _top_tags(self, response):
        top_tags = self._xpath_tags(response)
        return {
            'top_tags': top_tags[:self._top_tags_max()]
        }

    def _top_tags_max(self):
        return int(getattr(self, 'top', 10))

    def _xpath_tags(self, response):
        return response.xpath('//div[contains(@class, "tags-box")]//span//a/text()').getall()

    def _xpath_quotes(self, response):
        return response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()

    def _xpath_authors(self, response):
        return response.xpath('//small[@class="author"]/text()').getall()

    def _xpath_next_page(self, response):
        return response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()

    def _quotes_and_authors(self, response):
        quotes = self._xpath_quotes(response)
        authors = self._xpath_authors(response)
        return self._quote(quotes, authors)

    def _follow_if_nextpage(self, response, quotes):
        next_page = self._xpath_next_page(response)
        if next_page:
            return self._follow(response, next_page, cb_kwargs=self._quotes(quotes))
        return self._quotes(quotes)

    def _follow(self, response, next_page, **kwargs):
        return response.follow(next_page, callback=self.parse_only_quotes, **kwargs)

    def _quotes(self, quotes):
        return {
            'quotes': quotes
        }

    def _quote(self, quotes, authors):
        return [
            {'quote': quote, 'author': author}
            for quote, author in zip(quotes, authors)
        ]

    def parse_only_quotes(self, response, **kwargs):
        quotes = kwargs.get('quotes', [])
        quotes.extend(self._quotes_and_authors(response))
        yield self._follow_if_nextpage(response, quotes)

¿Qué querés que te diga? Ya no entiendo nada…

Reto y todo el códifo de la clase:

from typing import List, Dict, Any
import scrapy

## Web titles = //h1/a/text()
## Quotes = //span[@class="text" and @itemprop="text"]/text()
## Top ten tags = //div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()
## Next bottom = //ul[class="pager"]//li[@class="next"]/a/@href
## Authors = //small[@class="author" and @itemprop="author"]/text()

class QuotesSpider(scrapy.Spider):
    name: str = 'quotes'
    start_urls: List[str] = [
        'https://quotes.toscrape.com/page/1/'
    ]
    ## Info about settings:
    ## https://docs.scrapy.org/en/latest/topics/feed-exports.html?highlight=FEED_URI#feeds
    custom_settings: Dict[str, Dict[str, Any]] = {
        'FEEDS': {
            'quotes.json': {
                'format': 'json',
                'encoding': 'utf8',
                'store_empty': False,
                'fields': None,
                'indent': 4,
                'item_export_kwargs': {
                    'export_empty_fields': True,
                },
            },
        },
    }
    
    def parse_only_quotes(self,
                          response: scrapy.http.response.html.HtmlResponse,
                          **kwargs):
        ## If the dictionary is passed then assing to a variable the quotes list
        if kwargs:
            quotes: List[str] = kwargs['quotes'] ## The values inside the keys are assigned
            authors: List[str] = kwargs['authors']
            
        ## Add more quotes to the list
        quotes.extend(
            ## Get all quotes
            response.xpath(
                '//span[@class="text" and @itemprop="text"]/text()'
            ).getall()
        )
        
        authors.extend(
            response.xpath(
                '//small[@class="author" and @itemprop="author"]/text()'
            ).getall()
        )
        
        ## Get again the next button
        next_page_button_link: str = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href'
        ).get()
        
        ## Call again this method by recursion
        if next_page_button_link:
            yield response.follow(next_page_button_link,
                                  callback = self.parse_only_quotes,
                                  cb_kwargs = {'quotes': quotes, 'authors': authors}
                                  )
        ## When there is no more buttons, return the list of
        else:
            yield {
                'quotes': quotes,
                'authors': authors
            }
    
    def parse(self, response: scrapy.http.response.html.HtmlResponse):
        
        ## Title of "Quote ot scrape"
        title: str = response.xpath('//h1/a/text()').get()
        
        ## Authors list
        authors: List[str] = response.xpath(
            '//small[@class="author" and @itemprop="author"]/text()'
        ).getall()
        
        ## Quotes list
        quotes: List[str] = response.xpath(
                '//span[@class="text" and @itemprop="text"]/text()'
            ).getall()

        ## Tags of the most populars
        top_tags: List[str] = response.xpath(
            '//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()'
        ).getall()
        
        ## This can execute scrapy crawl quotes -a top=5
        ## Get the n op tags
        top: str = getattr(self, 'top', None)
        if top is not None:
            top: int = int(top)
            top_tags: List[int] = top_tags[:top]

        ## Return onlye the tiles and the top 10 tags
        yield {
            'tile': title,
            'top_tags': top_tags
        }
        
        ## Get the text for the next page
        next_page_button_link: str = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href'
        ).get()
        
        ## Do a recursion in case there are more links
        if next_page_button_link:
            yield response.follow(next_page_button_link,
                                  ## Call the methods to only get the quotes
                                  callback = self.parse_only_quotes,
                                  ## Send the dictionary with the quotes of the current page
                                  cb_kwargs = {'quotes': quotes, 'authors': authors}
                                  )

Asi me quedo el codigo:

def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
            
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath('//div[@class="col-md-8"]//small[@class="author"]/text()').getall())

        

        next_btn = response.xpath('//li[@class="next"]/a/@href').get()
        if next_btn:
            yield response.follow(next_btn, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'authors': authors})
        else:
            yield {
                'quotes': [quote + ' - ' + author for quote, author in zip(quotes, authors)]
            }

Lo hice con list comprehensions a partir de dos listas, una para los autores y otra para las citas.
La funcion zip() sirve para iterar mas de un iterable en un bucle en paralelo, asignándole el valor del elemento ‘actual’ del iterable a la variable que le corresponde, en este caso a quotes le corresponde quote y a authors le corresponde author

[
{"title": "Quotes to Scrape", "top_tags": ["love", "inspirational", "life", "humor", "books", "reading", "friendship", "friends", "truth", "simile"]},
{"AllQuotes": {
	"Albert Einstein": ["“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”", "“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”", "“Try not to become a man of success. Rather become a man of value.”", "“If you can't explain it to a six year old, you don't understand it yourself.”", "“If you want your children to be intelligent, read them fairy tales. If you want them to be more intelligent, read them more fairy tales.”", "“Logic will get you from A to Z; imagination will get you everywhere.”", "“Any fool can know. The point is to understand.”", "“Life is like riding a bicycle. To keep your balance, you must keep moving.”", "“If I were not a physicist, I would probably be a musician. I often think in music. I live my daydreams in music. I see my life in terms of music.”", "“Anyone who has never made a mistake has never tried anything new.”"], 
	"J.K. Rowling": ["“It is our choices, Harry, that show what we truly are, far more than our abilities.”", "“It takes a great deal of bravery to stand up to our enemies, but just as much to stand up to our friends.”", "“It is impossible to live without failing at something, unless you live so cautiously that you might as well not have lived at all - in which case, you fail by default.”", "“Of course it is happening inside your head, Harry, but why on earth should that mean that it is not real?”", "“To the well-organized mind, death is but the next great adventure.”", "“It matters not what someone is born, but what they grow to be.”", "“Do not pity the dead, Harry. Pity the living, and, above all those who live without love.”", "“Remember, if the time should come when you have to make a choice between what is right and what is easy, remember what happened to a boy who was good, and kind, and brave, because he strayed across the path of Lord Voldemort. Remember Cedric Diggory.”", "“The truth.\" Dumbledore sighed. \"It is a beautiful and terrible thing, and should therefore be treated with great caution.”"], 
	"Jane Austen": ["“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”", "“There is nothing I would not do for those who are really my friends. I have no notion of loving people by halves, it is not my nature.”", "“A lady's imagination is very rapid; it jumps from admiration to love, from love to matrimony in a moment.”", "“I declare after all there is no enjoyment like reading! How much sooner one tires of any thing than of a book! -- When I have a house of my own, I shall be miserable if I have not an excellent library.”", "“There are few people whom I really love, and still fewer of whom I think well. The more I see of the world, the more am I dissatisfied with it; and every day confirms my belief of the inconsistency of all human characters, and of the little dependence that can be placed on the appearance of merit or sense.”"], 
	"Marilyn Monroe": ["“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”", "“This life is what you make it. No matter what, you're going to mess up sometimes, it's a universal truth. But the good part is you get to decide how you're going to mess it up. Girls will be your friends - they'll act like it anyway. But just remember, some come, some go. The ones that stay with you through everything - they're your true best friends. Don't let go of them. Also remember, sisters make the best friends in the world. As for lovers, well, they'll come and go too. And baby, I hate to say it, most of them - actually pretty much all of them are going to break your heart, but you can't give up because if you give up, you'll never find your soulmate. You'll never find that half who makes you whole and that goes for everything. Just because you fail once, doesn't mean you're gonna fail at everything. Keep trying, hold on, and always, always, always believe in yourself, because if you don't, then who will, sweetie? So keep your head high, keep your chin up, and most importantly, keep smiling, because life's a beautiful thing and there's so much to smile about.”", "“You believe lies so you eventually learn to trust no one but yourself.”", "“If you can make a woman laugh, you can make her do anything.”", "“The real lover is the man who can thrill you by kissing your forehead or smiling into your eyes or just staring into space.”", "“A wise girl kisses but doesn't love, listens but doesn't believe, and leaves before she is left.”", "“I am good, but not an angel. I do sin, but I am not the devil. I am just a small girl in a big world trying to find someone to love.”"], 
	"André Gide": ["“It is better to be hated for what you are than to be loved for what you are not.”"], 
	"Thomas A. Edison": ["“I have not failed. I've just found 10,000 ways that won't work.”"], 
	"Eleanor Roosevelt": ["“A woman is like a tea bag; you never know how strong it is until it's in hot water.”", "“Do one thing every day that scares you.”"], 
	"Steve Martin": ["“A day without sunshine is like, you know, night.”"], 
	"Bob Marley": ["“You may not be her first, her last, or her only. She loved before she may love again. But if she loves you now, what else matters? She's not perfect—you aren't either, and the two of you may never be perfect together but if she can make you laugh, cause you to think twice, and admit to being human and making mistakes, hold onto her and give her the most you can. She may not be thinking about you every second of the day, but she will give you a part of her that she knows you can break—her heart. So don't hurt her, don't change her, don't analyze and don't expect more than she can give. Smile when she makes you happy, let her know when she makes you mad, and miss her when she's not there.”", "“One good thing about music, when it hits you, you feel no pain.”", "“The truth is, everyone is going to hurt you. You just got to find the ones worth suffering for.”"], 
	"Dr. Seuss": ["“I like nonsense, it wakes up the brain cells. Fantasy is a necessary ingredient in living.”", "“Today you are You, that is truer than true. There is no one alive who is Youer than You.”", "“The more that you read, the more things you will know. The more that you learn, the more places you'll go.”", "“I have heard there are troubles of more than one kind. Some come from ahead and some come from behind. But I've bought a big bat. I'm all ready you see. Now my troubles are going to have troubles with me!”", "“Think left and think right and think low and think high. Oh, the thinks you can think up if only you try!”", "“A person's a person, no matter how small.”"], 
	"Douglas Adams": ["“I may not have gone where I intended to go, but I think I have ended up where I needed to be.”"], 
	"Elie Wiesel": ["“The opposite of love is not hate, it's indifference. The opposite of art is not ugliness, it's indifference. The opposite of faith is not heresy, it's indifference. And the opposite of life is not death, it's indifference.”"], 
	"Friedrich Nietzsche": ["“It is not a lack of love, but a lack of friendship that makes unhappy marriages.”"], 
	"Mark Twain": ["“Good friends, good books, and a sleepy conscience: this is the ideal life.”", "“I have never let my schooling interfere with my education.”", "“′Classic′ - a book which people praise and don't read.”", "“The fear of death follows from the fear of life. A man who lives fully is prepared to die at any time.”", "“A lie can travel half way around the world while the truth is putting on its shoes.”", "“Never tell the truth to people who are not worthy of it.”"], 
	"Allen Saunders": ["“Life is what happens to us while we are making other plans.”"], 
	"Pablo Neruda": ["“I love you without knowing how, or when, or from where. I love you simply, without problems or pride: I love you in this way because I do not know any other way of loving but this, in which there is no I or you, so intimate that your hand upon my chest is my hand, so intimate that when I fall asleep your eyes close.”"], 
	"Ralph Waldo Emerson": ["“For every minute you are angry you lose sixty seconds of happiness.”", "“Finish each day and be done with it. You have done what you could. Some blunders and absurdities no doubt crept in; forget them as soon as you can. Tomorrow is a new day. You shall begin it serenely and with too high a spirit to be encumbered with your old nonsense.”"], 
	"Mother Teresa": ["“If you judge people, you have no time to love them.”", "“Not all of us can do great things. But we can do small things with great love.”"], 
	"Garrison Keillor": ["“Anyone who thinks sitting in church can make you a Christian must also think that sitting in a garage can make you a car.”"], 
	"Jim Henson": ["“Beauty is in the eye of the beholder and it may be necessary from time to time to give a stupid or misinformed beholder a black eye.”"], 
	"Charles M. Schulz": ["“All you need is love. But a little chocolate now and then doesn't hurt.”"], 
	"William Nicholson": ["“We read to know we're not alone.”"], 
	"Jorge Luis Borges": ["“I have always imagined that Paradise will be a kind of library.”"], 
	"George Eliot": ["“It is never too late to be what you might have been.”"], 
	"George R.R. Martin": ["“A reader lives a thousand lives before he dies, said Jojen. The man who never reads lives only one.”", "“... a mind needs books as a sword needs a whetstone, if it is to keep its edge.”"], 
	"C.S. Lewis": ["“You can never get a cup of tea large enough or a book long enough to suit me.”", "“To love at all is to be vulnerable. Love anything and your heart will be wrung and possibly broken. If you want to make sure of keeping it intact you must give it to no one, not even an animal. Wrap it carefully round with hobbies and little luxuries; avoid all entanglements. Lock it up safe in the casket or coffin of your selfishness. But in that casket, safe, dark, motionless, airless, it will change. It will not be broken; it will become unbreakable, impenetrable, irredeemable. To love is to be vulnerable.”", "“Some day you will be old enough to start reading fairy tales again.”", "“We are not necessarily doubting that God will do the best for us; we are wondering how painful the best will turn out to be.”", "“I believe in Christianity as I believe that the sun has risen: not only because I see it, but because by it I see everything else.”"], 
	"Martin Luther King Jr.": ["“Only in the darkness can you see the stars.”"], 
	"James Baldwin": ["“Love does not begin and end the way we seem to think it does. Love is a battle, love is a war; love is a growing up.”"], 
	"Haruki Murakami": ["“If you only read the books that everyone else is reading, you can only think what everyone else is thinking.”"], 
	"Alexandre Dumas fils": ["“The difference between genius and stupidity is: genius has its limits.”"], 
	"Stephenie Meyer": ["“He's like a drug for you, Bella.”"], 
	"Ernest Hemingway": ["“There is no friend as loyal as a book.”", "“There is nothing to writing. All you do is sit down at a typewriter and bleed.”"], 
	"Helen Keller": ["“When one door of happiness closes, another opens; but often we look so long at the closed door that we do not see the one which has been opened for us.”"], 
	"George Bernard Shaw": ["“Life isn't about finding yourself. Life is about creating yourself.”"], 
	"Charles Bukowski": ["“That's the problem with drinking, I thought, as I poured myself a drink. If something bad happens you drink in an attempt to forget; if something good happens you drink in order to celebrate; and if nothing happens you drink to make something happen.”", "“Some people never go crazy. What truly horrible lives they must lead.”"], 
	"Suzanne Collins": ["“You don’t forget the face of the person who was your last hope.”", "“Remember, we're madly in love, so it's all right to kiss me anytime you feel like it.”"], 
	"J.R.R. Tolkien": ["“Not all those who wander are lost.”"], 
	"Alfred Tennyson": ["“If I had a flower for every time I thought of you...I could walk through my garden forever.”"], 
	"Terry Pratchett": ["“The trouble with having an open mind, of course, is that people will insist on coming along and trying to put things in it.”"], 
	"J.D. Salinger": ["“What really knocks me out is a book that, when you're all done reading it, you wish the author that wrote it was a terrific friend of yours and you could call him up on the phone whenever you felt like it. That doesn't happen much, though.”"], 
	"George Carlin": ["“The reason I talk to myself is because I’m the only one whose answers I accept.”"], 
	"John Lennon": ["“You may say I'm a dreamer, but I'm not the only one. I hope someday you'll join us. And the world will live as one.”"], 
	"W.C. Fields": ["“I am free of all prejudice. I hate everyone equally. ”"], 
	"Ayn Rand": ["“The question isn't who is going to let me; it's who is going to stop me.”"], 
	"Jimi Hendrix": ["“I'm the one that's got to die when it's time for me to die, so let me live my life the way I want to.”"], 
	"J.M. Barrie": ["“To die will be an awfully big adventure.”"], 
	"E.E. Cummings": ["“It takes courage to grow up and become who you really are.”"], 
	"Khaled Hosseini": ["“But better to get hurt by the truth than comforted with a lie.”"], 
	"Harper Lee": ["“You never really understand a person until you consider things from his point of view... Until you climb inside of his skin and walk around in it.”"], 
	"Madeleine L'Engle": ["“You have to write the book that wants to be written. And if the book will be too difficult for grown-ups, then you write it for children.”"]}}
]

Hola a todos, este es mi código. Con unos pequeños addons se puede usar un segundo argumento para que solo guarde cierto número de las primeras citas.

import scrapy


class QuotesScrapy(scrapy.Spider):
    name = 'quotes'
    start_urls = ['https://quotes.toscrape.com/page/1/']
    custom_settings = {'FEED_URI': 'quotes.json',  # Nombre del archivo guardado
                       'FEDD_FORMAT': 'json',  # Formato del archivo guardado
                       'CONCURRENT_REQUEESTS': 30,  # Número de requests simultaneas que hace scrapy
                       'MEM_USAGE_LIMIT_MB': 2048,  # Uso lïmite de memoria ram por el proceso de dcraping
                       # email de notificacion en caso de error de uso de ram
                       'MEMUSAGE_NOTIFY_MAIL': '[email protected]',
                       'ROBOTS_OBEY': True,  # Obedecer al robots.txt de la página
                       'USER_AGENT': 'usuario_ramdom',  # Nombre de agente de usuario que escrapea
                       'FEED_EXPORT_ENCODING': 'utf-8'  # Formato de encodig de la respuesta

                       }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath('//small[@class="author"]/text()').getall()
        categories = response.xpath(
            '//div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()').getall()

        # Si existe un atributo 'top' en la ejecución de spyder, guardalo en la variable
        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            categories = categories[:top]

        yield {'title': title,
               'categories': categories}

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            # Scrapy puede seguir estos link relativos (que no tienen el host)
            yield response.follow(next_page_button_link, callback=(self.parse_quotes), cb_kwargs={'quotes': quotes, 'authors': authors})

    def parse_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']

        quotes.extend(response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath(
            '//small[@class="author"]/text()').getall())

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            # Scrapy puede seguir estos link relativos (que no tienen el host)
            yield response.follow(next_page_button_link, callback=(self.parse_quotes), cb_kwargs={'quotes': quotes, 'authors': authors})
        else:
            reord = [({'quote': quote, 'author': author})
                     for quote, author in zip(quotes, authors)]
            
            max_num_quotes = getattr(self, 'max_num_quotes', None)
            if max_num_quotes and int(max_num_quotes) < len(reord):
                max_num_quotes = int(max_num_quotes)
                reord = reord[:max_num_quotes]
            
            yield {'quotes': reord}

Me gusta bastante, pues hay que ver a quotes y (authors en el codigo del desafio “abajo”) como una recursividad y se va completando cada array (en mi caso) de quotes y authors para luego unirlos con zip(). gracias a la comunidad y sus aportes.

import scrapy

# Title = //h1/a/text()
#quotes = //span/[@class="text" and itemprop="text"]/text()
#authors = //small[@class="author"]/text()
#top_tags= //div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()'
#next_page_button = //ul[@class="pager"]//li[@class="next"]/a/@href

class QuotesSpider(scrapy.Spider):
  name = "quotes"
  start_urls= [
    "AQUI VA URL DEL SITIO DE QUOTES TO SCRAPE"
  ]
  custom_settings={
    'FEEDS':{
      'quotes.json':{
        'format': 'json',
        'encoding': 'utf8',
        'indent': 4,
      }
    },
    'CONCURRENT_REQUESTS':24,
    'MEMUSAGE_LIMIT_MB':2048, #RAM MEMORY
    'MEMUSAGE_NOTIFY_MAIL':['[email protected]'], #Emails if spider gets over memusage limit
    'ROBOTSTXT_OBEY':True,
    'USER_AGENT':"WADE"
  }


  def parse_only_quotes(self,response,**kwargs):
    complete_quote=[]
    if kwargs:
      quotes = kwargs['quotes']
      authors = kwargs['authors']

    quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
    authors.extend(response.xpath('//small[@class="author"]/text()').getall())


    next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
    if next_page_button_link:
      yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs= {'quotes':quotes,'authors':authors } )
    else:
      complete_quote.extend(zip(quotes,authors))
      yield {
        "complete_quote": complete_quote
      }


  def parse(self,response): #Tranform into a generator. keyword yield.
    title= response.xpath('//h1/a/text()').get()
    quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
    authors = response.xpath('//small[@class="author"]/text()').getall()
    top_tags= response.xpath('//div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()').getall()

    top = getattr(self,"top",None)

    if top:
      top = int(top)
      top_tags = top_tags[:top]

    yield {
      "title": title,
      "top_ten_tags": top_tags
    }

    next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
    if next_page_button_link:
      yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs= {'quotes':quotes, 'authors':authors }) #i'm gonna send a serie of arguments 


    ```

EL reto:

import scrapy

# title = response.xpath('//h1/a[@href="/"]/text()').get()
# quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
# top ten tags = response.xpath('//div[@class="col-md-4 tags-box"]/span[@class="tag-item"]/a/text()').getall()

# follow_link =  response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()

# authors = response.xpath('//span/small[@class="author" and @itemprop="author"]/text()').getall()


class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'http://quotes.toscrape.com/'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'PepitoPerez',
        'FEED_EXPORT_ENCODING': 'utf-8'
    }

    def parse_only_quotes_and_authors(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        quotes.extend(response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall())

        authors.extend(response.xpath(
            '//span/small[@class="author" and @itemprop="author"]/text()').getall())

        follow_link = response.xpath(
            '//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if follow_link:
            yield response.follow(follow_link, callback=self.parse_only_quotes_and_authors, cb_kwargs={'quotes': quotes, 'authors': authors})
        else:
            yield{
                'quotes': quotes,
                'authors': authors
            }

    def parse(self, response):

        title = response.xpath('//h1/a[@href="/"]/text()').get()
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath(
            '//span/small[@class="author" and @itemprop="author"]/text()').getall()
        top_tag = response.xpath(
            '//div[@class="col-md-4 tags-box"]/span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tag = top_tag[:top]

        yield {

            'title': title,
            'Top  Tag': top_tag
        }

        follow_link = response.xpath(
            '//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if follow_link:
            yield response.follow(follow_link, callback=self.parse_only_quotes_and_authors, cb_kwargs={'quotes': quotes, 'authors': authors})

Con algo de mejora al codigo para no repetir el sacar la misma informacion en ambas funciones

import scrapy

"""
Title = //h1/a/text()
Quotes = //span[@class="text" and @itemprop="text"]/text()
Top Ten Tags = //div[contains(@class, "tags-box")]/span[@class="tag-item"]/a/text()
Next = //ul[@class="pager"]//li[@class="next"]/a/@href
"""


class QuotesSpider(scrapy.Spider):
    name = "quotes"
    start_urls = [
        "http://quotes.toscrape.com/page/1/"
    ]
    custom_settings = {
        "FEED_URI": "quotes.json",
        "FEED_FORMAT": "json",
        "CONCURRENT_REQUESTS": 24,
        "MEMUSAGE_LIMIT_MB": 2048,
        "MEMUSAGE_NOTIFY_MAIL": ["[email protected]"],
        "ROBOTSTXT_OBEY": True,
        "USER_AGET": "User agent",
        "FEED_EXPORT_ENCODING": "utf-8"
    }

    def parse_only_quotes(self, response, **kwargs):
        kwargs["quotes"].extend(self.get_quotes(response))

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(
                next_page_button_link,
                callback=self.parse_only_quotes,
                cb_kwargs=kwargs
            )
        else:
            yield kwargs

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()

        tags = response.xpath(
            '//div[contains(@class, "tags-box")]/span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, "top", None)
        if top:
            top = int(top)
            tags = tags[:top]

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(
                next_page_button_link,
                callback=self.parse_only_quotes,
                cb_kwargs={
                    "title": title,
                    "top_ten_tags": tags,
                    "quotes": self.get_quotes(response),
                }
            )

    @staticmethod
    def get_quotes(response):
        new_quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()

        new_authors = response.xpath(
            '//div[@class="quote"]//small[@class="author" and @itemprop="author"]/text()').getall()

        full_quotes = [
            {"quote": quote, "author": author}
            for quote, author in zip(new_quotes, new_authors)
        ]

        return full_quotes

Aquí mi solución al reto:

##no pude poner el código directo porque me tira error por tener enlaces a sitios no seguros

Amigos les dejo todo el file del código con comentarios y con la solución del reto. Espero les sirva mucho. Saludos.!

import scrapy

# Titulo = //h1/a/text()
# Citas = //span[@class = "text" and @itemprop = "text"]/text()
# Top ten tags = //div[contains(@class, "tags-box")]/span[@class = "tag-item"]/a/text()
# Autores = //div[@class="quote"]//small[@class = "author" and @itemprop = "author"]/text()

# Next page button = //ul[@class="pager"]/li[@class="next"]/a/@href

# inherist from scrapy.Spider
class QuotesScraper(scrapy.Spider):

   # unique name who scrapy can refer to spider
   # inside the project, non repeat tame
   name = "quotes"
   # contain all the urls that we are goint to use
   start_urls = [
      "https://quotes.toscrape.com/"
   ]

   # atributo para guardar un archivo de forma automática
   custom_settings = {
      'FEED_URI': 'quotes.json', # para el nombre del archivo
      'FEED_FORMAT': 'json', # el formato en el que se guardará

      # otras configuraciones para sacarle provecho a este framework
      # para decirle cuantas peticiones tiene que hacer scrapy
      'CONCURRENT_REQUESTS': 24, # que haga 24 peticiones a la vez ya que asíncrono

      # cantidad de memoria ram que le permitimos usar a scrapy para trabajar
      'MEMUSAGE_LIMIT_MB': 2048,

      # si la memoria ram llega al límite se pasa hay que notificar
      'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],

      # decirle si va a obedecer o no al archivo robots
      'ROBOTSTXT_OBEY': True, # en lo posible siempre tru para que obedezca

      # cambiar el USER_AGENT = cabecera http que está en la petición
      # para indicar al sitio web quienes somo nosotros (chrome, safari, iphone, etc)
      'USER_AGENT': 'fernando', # en el servidor cuando se ejecute este spider
      # y llegue la petición ahí aparecerá la persona que hizo la petición

      # Lo Tenemos que hacer para solucionar el erro de encoding
      'FEED_EXPORT_ENCODING': 'utf-8', # para las tildes, eñes, etc 
   }
   # luego de eso de frente podemos tipear "scrapy crawl quotes"
   # y guardará lo que está en el yield como si pondriamos -o quotes.json

   # IMPORTANTE
   # creamos un nuevo método de tipo parse
   # este método extraerá exclusivamente la cita
   # un método tipo 'parse' siempre tiene que recibir una respuesta 'http'
   # para poder trabajar con ello por eso el parámetro es 'response'
   
   # lo primero que se hace es recibir los 'kwargs' que me estoy mandando
   # en el método 'response.follow()'

   def parse_only_quotes(self, response, **kwargs):
      # preguntamos si existe kwargs
      if kwargs:
         # si existe guardamos lo que está dentro del diccionario
         quotes_list = kwargs['quotes_list']
         # quotes es una lista
         # ahora tengo que agregar a esa lista nuevos resultados

         quotes = response.xpath('//span[@class = "text" and @itemprop = "text"]/text()').getall()
         authors = response.xpath('//div[@class="quote"]//small[@class = "author" and @itemprop = "author"]/text()').getall()

         quotes_list.extend([[quote, author] for quote, author in zip(quotes, authors)])

      # calculamos nuevamente para traerme el link "next"
      next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
      # preguntamos si ese botón existe
      if next_page_button_link:
         yield response.follow(next_page_button_link,
                                 callback=self.parse_only_quotes,
                                 cb_kwargs = {
                                    # 'quotes': quotes,
                                    'quotes_list': quotes_list
                                 })
      # en el caso no exista otra página 'next'
      else:
         yield {
            # como ya se fue llenando lo exportamos
            # 'quotes': quotes
            'quotes_list': quotes_list
         }


   # important method
   # parse = analizar un archivo para extraer información valiosa
   def parse(self, response):

      # # to see the results on console
      # print('*' * 10)
      # print('\n\n\n')
      # # print(response.status, response.headers)
      # title = response.xpath('//h1/a/text()').get()
      # print(f'Título: {title}')
      # print('\n\n')

      # quotes = response.xpath('//span[@class = "text" and @itemprop = "text"]/text()').getall()
      # print('Citas: ')
      # for quote in quotes:
      #    print(f'- {quote}')
      # print('\n\n')

      # top_tags = response.xpath('//div[contains(@class, "tags-box")]/span[@class = "tag-item"]/a/text()').getall()
      # print('Top Ten Tags: ')
      # for tag in top_tags:
      #    print(f'- {tag}')
      # print('\n\n\n')
      # print('*' * 10)

      title = response.xpath('//h1/a/text()').get()
      top_tags = response.xpath('//div[contains(@class, "tags-box")]/span[@class = "tag-item"]/a/text()').getall()
      quotes = response.xpath('//span[@class = "text" and @itemprop = "text"]/text()').getall()
      authors = response.xpath('//div[@class="quote"]//small[@class = "author" and @itemprop = "author"]/text()').getall()
      
      # con esta linea de código
      # le preguntamos a scrapy
      # si existe dentro de la ejecución de este spider
      # un atributo de nombre 'top'
      # voy a guardar ese resultado dentro de la variable 'top'
      # si ese ejecuto jamás se envió o no existe el resultado es 'None'
      top = getattr(self, 'top', None)
      if top:
         top = int(top)
         top_tags = top_tags[:top]
      # ahora ejecutamos en consola y agregamos otro argumento
      # 'scrapy crawl quotes -a top=3'
      # -a = flag arguments
      # top = variable y el valor

      yield {
         'title': title,
         # 'quotes': quotes, # sacamos los quotes
         'top_ten_ten': top_tags
      }

      # traemos el link "next"
      next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
      # preguntamos si ese botón existe
      if next_page_button_link:
         # entonces retornamos parcialmante
         # si existe seguimos ese link y repetimos el método
         # osea scrapy toma la url absoluta y lo junta con la parte relativa
         # luego de hacer la request ejecutamos la función self.parse

         # antes aquí devolvía self.parse
         # pero como ahora estamos creando otro metodo de tipo parse
         # en que solo se encargará de devolver las citas entonces
         # llamamos al método 'parse_only_quotes'

         # luego enviaremos a este método una serie de argumentos(citas de la primera página)
         # eso hacemos con 'cb_kwargs' = keyword arguments = diccionario en el 
         # cual yo le paso argumentos a mi otra función
         yield response.follow(next_page_button_link,
                                 callback=self.parse_only_quotes,
                                 cb_kwargs = {
                                    # 'quotes': quotes
                                    'quotes_list': [[quote, author] for quote,author in zip(quotes, authors)]
                                 })


      # inside the project
      # esto lo usamos para llamar a un spider
      # "scrapy crawl quotes" para ver lo que está en parse
      # "scrapy crawl quotes -o quotes.csv" = -o:output

# scrapy no abre el entorno del sitio web
# scrapy shell "https://quotes.toscrape.com/"

Reto:

import scrapy

#Header= '//div[@class="row header-box"]/div[@class="col-md-8"]/h1/a/text()'
#Quotes= '//div[@class="row"]/div[@class="col-md-8"]/div/span[@class="text"]/text()'
#Top_Ten_tags= '//div[@class="row"]/div[@class="col-md-4 tags-box"]/span/a/text()'
#Next_Page_Button= '//div[@class="row"]/div[@class="col-md-8"]/nav/ul/li/a/@href'
#Autor_Quotes= //div[@class="col-md-8"]/div[@class="quote"]/span/small/text()

class QuotesSpider(scrapy.Spider):
    name='quotes'
    start_urls=['https://quotes.toscrape.com']

    custom_settings = { #para guardar el resultado en un archivo json
        "FEEDS": {
        "items.json": {
        "format": "json", "encoding":"utf-8", "store_empty":False,"fields":None,"indent":4,"item_export_kwargs":{"export_empty_fields":True}}},
        "ROBOTSTXT_OBEY":True}

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes=kwargs['Quotes']
            authors=kwargs['Autores']
        quotes.extend(response.xpath('//div[@class="row"]/div[@class="col-md-8"]/div/span[@class="text"]/text()').getall())
        authors.extend(response.xpath('//div[@class="col-md-8"]/div[@class="quote"]/span/small/text()').getall())
        next_page_button = response.xpath('//div[@class="row"]/div[@class="col-md-8"]/nav/ul/li[@class="next"]/a/@href').get()
        if next_page_button:
            yield response.follow(next_page_button,callback=self.parse_only_quotes, cb_kwargs={'Quotes':quotes,'Autores':authors})
        else:
            yield{'Quotes':quotes,'Authors':authors}

    def parse(self, response):
        titulo=response.xpath('//div[@class="row header-box"]/div[@class="col-md-8"]/h1/a/text()').get()
        quotes=response.xpath('//div[@class="row"]/div[@class="col-md-8"]/div/span[@class="text"]/text()').getall()
        top_ten_tags=response.xpath('//div[@class="row"]/div[@class="col-md-4 tags-box"]/span/a/text()').getall()
        authors=response.xpath('//div[@class="col-md-8"]/div[@class="quote"]/span/small/text()').getall()
        
        yield{
            'Titulo':titulo,'Top_Ten_Tags':top_ten_tags
            }

        next_page_button = response.xpath('//div[@class="row"]/div[@class="col-md-8"]/nav/ul/li[@class="next"]/a/@href').get()
        if next_page_button:
            yield response.follow(next_page_button, callback=self.parse_only_quotes, cb_kwargs= {'Quotes':quotes,'Autores':authors})
    

Hola, comparto mi solución al reto.
En mi caso utilice la funcionalidad de python: zip()

Mi código quedo de la siguiente manera:

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
            quotes_authors = kwargs['quotes_authors']
        quotes.extend(response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath(
            '//span/small[@class="author" and @itemprop="author"]/text()').getall())
        quotes_authors.extend(list(zip(quotes, authors)))

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()

        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'authors': authors, 'quotes_authors': quotes_authors})
        else:
            yield {
                'quotes_authors': quotes_authors
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath(
            '//span/small[@class="author" and @itemprop="author"]/text()').getall()
        top_tags = response.xpath(
            '//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()
        quotes_authors = list(zip(quotes, authors))

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()

        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'authors': authors, 'quotes_authors': quotes_authors})

El output obtenido es el siguiente:

En mi código intente que me regresara dentro de la lista generar una serie de tuplas que incluyeran la cita y al autor, sin embargo, aunque colocara la palabra tuple para la conversión, pero en el archivo .json se siguieron mostrando como un conjunto de listas dentro de una lista. Si alguien sabe cómo hacerlo agradeceré mucho lo comparta.

Una cadena que define el nombre de esta araña. El nombre de la araña es cómo Scrapy localiza (y crea una instancia) la araña, por lo que debe ser único. Sin embargo, nada le impide crear más de una instancia de la misma araña. Este es el atributo de araña más importante y es obligatorio.

Para las spiders, el ciclo de raspado pasa por algo como esto:

Comienza generando las solicitudes iniciales para rastrear las primeras URL y especifica una función de devolución de llamada a la que se llamará con la respuesta descargada de esas solicitudes.

Las primeras solicitudes a realizar se obtienen llamando al start_requests()método que (por defecto) genera Requestpara las URL especificadas en el start_urlsy el parsemétodo como función de devolución de llamada para las Solicitudes.

En la función de devolución de llamada, analiza la respuesta (página web) y devuelve objetos de elementos , Requestobjetos o un iterable de estos objetos. Esas Solicitudes también contendrán una devolución de llamada (tal vez la misma) y luego serán descargadas por Scrapy y luego su respuesta manejada por la devolución de llamada especificada.

En las funciones de devolución de llamada, analizas el contenido de la página, normalmente usando Selectores (pero también puedes usar BeautifulSoup, lxml o cualquier mecanismo que prefieras) y generas elementos con los datos analizados.

Por último, los elementos devueltos por la araña normalmente se conservarán en una base de datos (en algunos procesos de elementos ) o se escribirán en un archivo mediante las exportaciones de fuentes .

Ahora tengo un nuevo super poder!!!

Para lograr el reto añadi un argumento a cb_kwargs cambiando el response.follow del parse inicial

response.follow(next_page_button_link, callback = self.parse_only_quotes, cb_kwargs = {'quotes':quotes, 'authors':authors})

Luego en el segundo parse:

  1. añadir el valor de los autores de la misma manera que el profesor añadió el valor de las citas
  2. Unir cada frase con su autor con un ciclo for , acá se puede hacer los detalles de escrituras que quieras yo elimine los “ ” de cada frase y añadí el autor con un espacio

esta función (generador en realidad) quedaría así

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        quotes.extend(response.xpath('//span[@class = "text" and @itemprop = "text"]/text()').getall())
        authors.extend(response.xpath('//div[@class="quote"]//small[@class="author"]//text()').getall())

        quotes_authors = []

        for i in range(len(quotes)):
            update = quotes[i] + ' ' + authors[i]
            update= update.replace('“','').replace('”','')
            quotes_authors.append(update)

        


        next_page_button_link = response.xpath('//ul[@class = "pager"]//li[@class = "next"]/a/@href').get()
        if next_page_button_link: 
            yield response.follow(next_page_button_link, callback = self.parse_only_quotes, cb_kwargs = {'quotes':quotes, 'authors':authors})
        else:
            yield {
                'quotes_authors':quotes_authors
            }

Hola amigos les comparto mi solución, el método que más sufrió modificación fue parse_only_quotes.

def parse_only_quotes(self, response, **kwargs):
if kwargs:
quotes_with_author = kwargs[‘quotes’]
    quotes = response.xpath('//div[@class="quote"]/span[@class="text"]/text()').getall()
    authors = response.xpath('////div[@class="quote"]//span/small/text()').getall()
    quotes_and_authors = tuple(zip(quotes, authors))
    titles = ['quote', 'author']
    tmp_quotes_with_authors = [{quote: author for quote, author in zip(titles, sub)} for sub in quotes_and_authors]
    quotes_with_author.extend(tmp_quotes_with_authors)

    next_page_button_link = response.xpath('//li[@class="next"]/a/@href').get()
    # Evaluacion de siguiente página
    if next_page_button_link:
        yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes_with_author})
    else:
        yield {
            'quotes': quotes_with_author
        }

Mi resultado final es el siguiente:

Reto

import scrapy

# Título = //h1/a/text()
# Citas = //span[@class="text" and @itemprop="text"]/text()
# Top ten tags = //div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()
# Next page button = //ul[@class="pager"]//li[@class="next"]/a/@href
# Author = //span/small[@class="author" and @itemprop="author"]/text()

class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'https://quotes.toscrape.com/'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRECT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'pepito',
        'FEED_EXPORT_ENCODING': 'utf-8'
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath('//span/small[@class="author" and @itemprop="author"]/text()').getall())

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':quotes, 'authors':authors})
        else:
            quotes_author = []
            for idx in range(len(quotes)):
                quotes_author.append(quotes[idx] + ' by ' + authors[idx])
            yield {
                'quotes': quotes_author
            }
    def parse(self, response):

        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()
        authors = response.xpath('//span/small[@class="author" and @itemprop="author"]/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]
        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':quotes, 'authors':authors})

Mi solución de autores:

[
    {
        "title": "Quotes to Scrape",
        "top_tags": [
            "love",
            "inspirational",
            "life",
            "humor",
            "books",
            "reading",
            "friendship"
        ]
    },
    {
        "quotes": [
            {
                "quote": "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”",
                "author": "Albert Einstein"
            },
            {
                "quote": "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",
                "author": "J.K. Rowling"
            },
	...
	]
    }
]

Funciones de parse:

def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']


        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath('//div[@class="quote"]//small[@class="author" and @itemprop="author"]/text()').getall())

        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'authors': authors})
        else:
            yield {
                'quotes': [{'quote': q, 'author': a} for q, a in zip(quotes, authors)]
            }


    def parse(self, response):
        # print(response.status, response.headers)
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath('//div[@class="quote"]//small[@class="author" and @itemprop="author"]/text()').getall()
        top_tags = response.xpath('//div[contains(@class, "tags-box")]/span[@class="tag-item"]/a[@class="tag"]/text()').getall()

        # Si existe un atributo de nombre top lo voy a guardar en mi variable top sino guardare None, esto a partir del objeto self
        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top] # [:variable] = slice -> rebanar del indice hasta donde le indico

        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'authors': authors})

RETO:

import scrapy

class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'LINK'
    ]
    custom_settings = {
        'FEEDS': {
            'quotes.json': {
                'format': 'json',
                'encoding': 'utf8',
                'fields': ['title', 'quotes', 'top_tags'],
                'overwrite': True
            }
        },
        'CONCURRENT_REQUESTS':24,
        'MEMUSAGE_LIMIT_MB':1024,
        'MEMUSAGE_NOTIFY_MAIL':['[email protected]'],
        'ROBOTSTXT_OBEY':True,
        'USER_AGENT':'Mozilla/5.0'
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']

        quotes2 =response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        authors2 =response.xpath('//small[@class="author" and @itemprop="author"]/text()').getall()
        quotes.extend(list(zip(quotes2,authors2)))

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs= {'quotes':quotes })
        else:
            yield{
                'quotes': quotes
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath('//small[@class="author" and @itemprop="author"]/text()').getall()
        
        # Aquí se permite ingresar la cantidad de top que se quiere añadiendo -a top=int a la ejecución.
        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        
        yield {
            'title': title, 
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs= {'quotes': list(zip(quotes,authors))})

Reto

Complementar las frases o citas con el nombre del autor.

El nombre del autor de cada cita se encuentra en las etiquetas small que tiene como clase “author”:

Prueba de la expresión XPath en la consola Scrapy para la primera página:

Implementación en el programa:

import scrapy

# Tútlo de la web = //h1/a/text()
# Frases: //span[@class="text" and @itemprop="text"]/text()
# Top ten tags = //div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()'
# Botón Next = //ul[@class="pager"]//li[@class="next"]/a/@href
# Autores: //small[@class="author"]/text()

class QuotesSpider(scrapy.Spider):
    name = 'quotes'  # nombre único
    start_urls = [
        'https://quotes.toscrape.com/'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24, # Número de peticiones a la vez para Scrapy
        'MEMUSAGE_LIMIT_MB': 2048, # Max memoria RAM a usar
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'], #lista de email para notificar el exceso de memoria
        'ROBOTSTXT_OBE': True, # respeta las reglas del archivo robots.txt de la url
        'USER_AGENT': 'User101', # header http que indica quienes somos nosotros
        'FEED_EXPORT_ENCODING': 'utf-8' # dar un correcto formato de caracteres
    }

    def parse_only_quotes(self, response, **kwargs): # aquí se recibe el diccionario de argumentos, que está en el comando follow y que son las citas (quotes)
        if kwargs:
            quotes = kwargs['quotes'] # almacenar las citas
            authors = kwargs['authors'] 

        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()) # agrega las citas de la nueva página
        authors.extend(response.xpath('//small[@class="author"]/text()').getall())

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'authors': authors})  # keyword arguments (diccionario con argumentos para la función)        
        else:
            # Unión de cada nota con su consecutivo autor
            quotes_authors = [x for y in zip(quotes, authors) for x in y]

            yield {
                'quotes + authors': quotes_authors, # [cita n, autor n]
            }

    def parse(self, response):  # analizar un archivo (html) y obtener la información deseada
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()
        authors = response.xpath('//small[@class="author"]/text()').getall()

        # Para extraer el top n, no todos
        top = getattr(self, 'top', None) # si existe un atributo llamada 'top' lo
                                         # almacena en la variable, si no le asigna None
        if top:
            top = int(top)
            top_tags = top_tags[:top] # slices o rebanadas en Python

        yield {
            'title': title,
            #'quotes': quotes, con la función parse no se procesan las citas, estas (quotes) se envian al método parse_only_quotes en el comando follow
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, 
                                  callback=self.parse_only_quotes, 
                                  cb_kwargs={'quotes': quotes, 'authors': authors})  # keyword arguments (diccionario con argumentos para la función)
# Titulo = //h1/a/text()
# Citas = //span[@class="text" and @itemprop= "text"]/text()
# Top ten tags = //div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()
# next_page_button = //ul[@class="pager"]//li[@class="next"]/a/@href
# autores = response.xpath('//span/small/text()').get()
class quote_spider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'https://quotes.toscrape.com/'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.xml',
        'FEED_FORMAT': 'xml'
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes_with_author = kwargs['quotes_with_author']

        quote = response.xpath('//span[@class="text" and @itemprop= "text"]/text()').getall()
        author = response.xpath('//span/small/text()').getall()

        for i in range(len(quote)):
            dict = {'quote': quote[i],
                    'author': author[i]}

            quotes_with_author.append(dict)

        next_page_button = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()

        if next_page_button:
            yield response.follow(next_page_button, callback=self.parse_only_quotes,
                                  cb_kwargs={'quotes_with_author': quotes_with_author})
        else:
            yield {
                'quotes_with_author': quotes_with_author,

            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()

        quote = response.xpath('//span[@class="text" and @itemprop= "text"]/text()').getall()
        author = response.xpath('//span/small/text()').getall()
        quotes_with_author =[]
        for i in range(len(quote)):

            dict={'quote':quote[i],
                  'author':author[i]}
            print("jola")
            print(dict)

            quotes_with_author.append(dict)

        top_ten = response.xpath('//div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, 'top', None)

        if top:
            top = int(top)
            top_ten = top_ten[:top]

        yield {
            'title': title,
            'top_ten': top_ten
        }

        next_page_button = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()

        if next_page_button:
            yield response.follow(next_page_button, callback=self.parse_only_quotes,
                                  cb_kwargs={'quotes_with_author': quotes_with_author})

Aca dejo mi aporte, el link originalmente es http, pero platzi no permite publicar links http, entonces le adjunte la s, si le quitan esa s al link debe de funcionar, mi solucion consiste en crear una lista de diccionarios para asi tener una etiqueta “quote_with_author”, adentro de ellas las etiquetas quote, author, y retornarlo en un xml

def parse_only_quotes(self, response, **kwargs):
        """ Ask for kwargs """
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        quotes.extend(response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath(
            '//div[@class="quote"]//span//small[@class="author"]/text()').getall())

        """ Click in next button """
        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'authors': authors})
        else:
            yield {
                'quotes': quotes,
                'authors': authors
            }
""" Get the information  """

    def parse(self, response):
        """ Get the title """
        title = response.xpath('//h1/a/text()').get()
        """ Get the quotes """
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        """ Get the authors """
        authors = response.xpath(
            '//div[@class="quote"]//span//small[@class="author"]/text()').getall()
        """ Get the Top ten tags """
        top_tags = response.xpath(
            '//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()

Hola! En mi caso quise devolver las citas en un formato JSON, para hacerlo más realista en caso de que alguna web desee usarlo.
Mi JSON quedaría de la siguiente manera:

[
  {
    "title": "Quotes to Scrape",
    "top_tags": [
      "love",
      "inspirational",
    ]
  },
  {
    "quotes": [
      {
        "quote": "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”",
        "author": "Albert Einstein"
      },
      {
        "quote": "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",
        "author": "J.K. Rowling"
      },
    ]
}

Aquí está el código por si desean revisarlo, la clave está en hacer un “merge” con un for de la cita con el autor, y esto lo hice creando una función aparte.
Espero les sirva!

def parse(self, response):
    title = response.xpath('//h1/a/text()').get()
    top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()

    top = getattr(self, 'top', None) 
    if top:
      top = int(top)
      top_tags = top_tags[:top] 

    yield {
      'title': title,
      'top_tags': top_tags,
    }

    quotes = []
    current_quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
    current_authors = response.xpath('//small[@class="author"]/text()').getall()

    quotes = self.get_full_quotes([], current_quotes, current_authors)

    next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
    if (next_page_button_link):
      yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes}) 
  

def parse_only_quotes(self, response, **kwargs):
    if kwargs:
      quotes = kwargs['quotes']
    
    current_quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
    current_authors = response.xpath('//small[@class="author"]/text()').getall()

    quotes = self.get_full_quotes(quotes, current_quotes, current_authors)

    next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
    if (next_page_button_link):
      yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes}) 
    else:
      yield {
        'quotes': quotes,
      }

def get_full_quotes(self, quotes, current_quotes, current_authors):
    if len(current_quotes) == len(current_authors):
      for quote, author in zip(current_quotes, current_authors):
        full_quote = {
          'quote': quote,
          'author': author,
        }
        quotes.append(full_quote)
    return quotes

Hola dejo mi solución espero les sirva

from typing import get_args
import scrapy


class QuotesDpider(scrapy.Spider):
    title_u = ''
    tags_t = []
    name = 'quotes'
    start_urls = [
        'https://quotes.toscrape.com/'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FROMAT': 'json',
        'CONCURRENT_REQUESTS':24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'PEPE',
        'FEED_EXPORT_ENCODING': 'utf-8',
        # 'CLOSESPIDER_PAGECOUNT': # Un poco alto
    }


    def parse_quotes_authors(self, response, **kwargs):
        if kwargs:
            list_full = list(kwargs['quotes'])
            quotes = (response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
            authors = (response.xpath('//small[@class="author" and @itemprop="author"]/text()').getall())
            list_qa = [({'quote':quote, 'author':author}) for quote,author in zip(quotes,authors)]
            list_full = list_full + list_qa
        next_page_link = response.xpath(
            '//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_link:
            yield response.follow(
                next_page_link, callback=self.parse_quotes_authors,
                cb_kwargs={
                    'quotes':list_full
                }
            )
        else:
            yield {
                'Title':self.getTitle(),
                'Top tgs':self.getTags(),
                'quotes':list_full
            }

    def parse(self, response):
        print('*' * 10)
        print('\n\n')
        # print(response.status, response.headers)
        title = response.xpath('//h1/a/text()').get()
        print(f'Titulo: {title}')
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath(
            '//small[@class="author" and @itemprop="author"]/text()').getall()
        list_qa = [({'quote':quote, 'author':author}) for quote,author in zip(quotes,authors)]
        tags = response.xpath(
            '//div[contains(@class,"tags-box")]//span[@class = "tag-item"]/a/text()').getall()
        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            tags = tags[:top]
        print('\n\n')
        print('*' * 10)
        self.setTags(tags)
        self.setTitle(title)
        next_page_link = response.xpath(
            '//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_link:
            yield response.follow(
                next_page_link, callback = self.parse_quotes_authors,
                cb_kwargs={
                    'quotes':list_qa
                }
            )
    
    def setTags(self,tags):
        self.tags_t = tags
        
    def getTags(self):
        return self.tags_t

    def setTitle(self,title):
        self.title_u = title
        
    def getTitle(self):
        return self.title_u

y este es mi output

{
    "Title": "Quotes to Scrape",
    "Top tgs": [
        "love",
        "inspirational",
        "life",
        "humor",
        "books"
    ],
    "quotes": [
        {
            "quote": "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”",
            "author": "Albert Einstein"
        },
        {
            "quote": "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",
            "author": "J.K. Rowling"
        },
        {
            "quote": "“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”",
            "author": "Albert Einstein"
        },
        {
            "quote": "“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”",
            "author": "Jane Austen"
        },
        {
            "quote": "“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”",
            "author": "Marilyn Monroe"
        },
        {
            "quote": "“Try not to become a man of success. Rather become a man of value.”",
            "author": "Albert Einstein"
        },
        {
            "quote": "“It is better to be hated for what you are than to be loved for what you are not.”",
            "author": "André Gide"
        },
        {
            "quote": "“I have not failed. I've just found 10,000 ways that won't work.”",
            "author": "Thomas A. Edison"
        },
        {
            "quote": "“A woman is like a tea bag; you never know how strong it is until it's in hot water.”",
            "author": "Eleanor Roosevelt"
        },
        {
            "quote": "“A day without sunshine is like, you know, night.”",
            "author": "Steve Martin"
        },
        {
            "quote": "“This life is what you make it. No matter what, you're going to mess up sometimes, it's a universal truth. But the good part is you get to decide how you're going to mess it up. Girls will be your friends - they'll act like it anyway. But just remember, some come, some go. The ones that stay with you through everything - they're your true best friends. Don't let go of them. Also remember, sisters make the best friends in the world. As for lovers, well, they'll come and go too. And baby, I hate to say it, most of them - actually pretty much all of them are going to break your heart, but you can't give up because if you give up, you'll never find your soulmate. You'll never find that half who makes you whole and that goes for everything. Just because you fail once, doesn't mean you're gonna fail at everything. Keep trying, hold on, and always, always, always believe in yourself, because if you don't, then who will, sweetie? So keep your head high, keep your chin up, and most importantly, keep smiling, because life's a beautiful thing and there's so much to smile about.”",
            "author": "Marilyn Monroe"
        },
        {
            "quote": "“It takes a great deal of bravery to stand up to our enemies, but just as much to stand up to our friends.”",
            "author": "J.K. Rowling"
        },
        {
            "quote": "“If you can't explain it to a six year old, you don't understand it yourself.”",
            "author": "Albert Einstein"
        },
        {
            "quote": "“You may not be her first, her last, or her only. She loved before she may love again. But if she loves you now, what else matters? She's not perfect—you aren't either, and the two of you may never be perfect together but if she can make you laugh, cause you to think twice, and admit to being human and making mistakes, hold onto her and give her the most you can. She may not be thinking about you every second of the day, but she will give you a part of her that she knows you can break—her heart. So don't hurt her, don't change her, don't analyze and don't expect more than she can give. Smile when she makes you happy, let her know when she makes you mad, and miss her when she's not there.”",
            "author": "Bob Marley"
        },
        {
            "quote": "“I like nonsense, it wakes up the brain cells. Fantasy is a necessary ingredient in living.”",
            "author": "Dr. Seuss"
        },
        {
            "quote": "“I may not have gone where I intended to go, but I think I have ended up where I needed to be.”",
            "author": "Douglas Adams"
        },
        {
            "quote": "“The opposite of love is not hate, it's indifference. The opposite of art is not ugliness, it's indifference. The opposite of faith is not heresy, it's indifference. And the opposite of life is not death, it's indifference.”",
            "author": "Elie Wiesel"
        },
        {
            "quote": "“It is not a lack of love, but a lack of friendship that makes unhappy marriages.”",
            "author": "Friedrich Nietzsche"
        },
        {
            "quote": "“Good friends, good books, and a sleepy conscience: this is the ideal life.”",
            "author": "Mark Twain"
        },
        {
            "quote": "“Life is what happens to us while we are making other plans.”",
            "author": "Allen Saunders"
        },
        {
            "quote": "“I love you without knowing how, or when, or from where. I love you simply, without problems or pride: I love you in this way because I do not know any other way of loving but this, in which there is no I or you, so intimate that your hand upon my chest is my hand, so intimate that when I fall asleep your eyes close.”",
            "author": "Pablo Neruda"
        },
        {
            "quote": "“For every minute you are angry you lose sixty seconds of happiness.”",
            "author": "Ralph Waldo Emerson"
        },
        {
            "quote": "“If you judge people, you have no time to love them.”",
            "author": "Mother Teresa"
        },
        {
            "quote": "“Anyone who thinks sitting in church can make you a Christian must also think that sitting in a garage can make you a car.”",
            "author": "Garrison Keillor"
        },
        {
            "quote": "“Beauty is in the eye of the beholder and it may be necessary from time to time to give a stupid or misinformed beholder a black eye.”",
            "author": "Jim Henson"
        },
        {
            "quote": "“Today you are You, that is truer than true. There is no one alive who is Youer than You.”",
            "author": "Dr. Seuss"
        },
        {
            "quote": "“If you want your children to be intelligent, read them fairy tales. If you want them to be more intelligent, read them more fairy tales.”",
            "author": "Albert Einstein"
        },
        {
            "quote": "“It is impossible to live without failing at something, unless you live so cautiously that you might as well not have lived at all - in which case, you fail by default.”",
            "author": "J.K. Rowling"
        },
        {
            "quote": "“Logic will get you from A to Z; imagination will get you everywhere.”",
            "author": "Albert Einstein"
        },
        {
            "quote": "“One good thing about music, when it hits you, you feel no pain.”",
            "author": "Bob Marley"
        },
        {
            "quote": "“The more that you read, the more things you will know. The more that you learn, the more places you'll go.”",
            "author": "Dr. Seuss"
        },
        {
            "quote": "“Of course it is happening inside your head, Harry, but why on earth should that mean that it is not real?”",
            "author": "J.K. Rowling"
        },
        {
            "quote": "“The truth is, everyone is going to hurt you. You just got to find the ones worth suffering for.”",
            "author": "Bob Marley"
        },
        {
            "quote": "“Not all of us can do great things. But we can do small things with great love.”",
            "author": "Mother Teresa"
        },
        {
            "quote": "“To the well-organized mind, death is but the next great adventure.”",
            "author": "J.K. Rowling"
        },
        {
            "quote": "“All you need is love. But a little chocolate now and then doesn't hurt.”",
            "author": "Charles M. Schulz"
        },
        {
            "quote": "“We read to know we're not alone.”",
            "author": "William Nicholson"
        },
        {
            "quote": "“Any fool can know. The point is to understand.”",
            "author": "Albert Einstein"
        },
        {
            "quote": "“I have always imagined that Paradise will be a kind of library.”",
            "author": "Jorge Luis Borges"
        },
        {
            "quote": "“It is never too late to be what you might have been.”",
            "author": "George Eliot"
        },
        {
            "quote": "“A reader lives a thousand lives before he dies, said Jojen. The man who never reads lives only one.”",
            "author": "George R.R. Martin"
        },
        {
            "quote": "“You can never get a cup of tea large enough or a book long enough to suit me.”",
            "author": "C.S. Lewis"
        },
        {
            "quote": "“You believe lies so you eventually learn to trust no one but yourself.”",
            "author": "Marilyn Monroe"
        },
        {
            "quote": "“If you can make a woman laugh, you can make her do anything.”",
            "author": "Marilyn Monroe"
        },
        {
            "quote": "“Life is like riding a bicycle. To keep your balance, you must keep moving.”",
            "author": "Albert Einstein"
        },
        {
            "quote": "“The real lover is the man who can thrill you by kissing your forehead or smiling into your eyes or just staring into space.”",
            "author": "Marilyn Monroe"
        },
        {
            "quote": "“A wise girl kisses but doesn't love, listens but doesn't believe, and leaves before she is left.”",
            "author": "Marilyn Monroe"
        },
        {
            "quote": "“Only in the darkness can you see the stars.”",
            "author": "Martin Luther King Jr."
        },
        {
            "quote": "“It matters not what someone is born, but what they grow to be.”",
            "author": "J.K. Rowling"
        },
        {
            "quote": "“Love does not begin and end the way we seem to think it does. Love is a battle, love is a war; love is a growing up.”",
            "author": "James Baldwin"
        },
        {
            "quote": "“There is nothing I would not do for those who are really my friends. I have no notion of loving people by halves, it is not my nature.”",
            "author": "Jane Austen"
        },
        {
            "quote": "“Do one thing every day that scares you.”",
            "author": "Eleanor Roosevelt"
        },
        {
            "quote": "“I am good, but not an angel. I do sin, but I am not the devil. I am just a small girl in a big world trying to find someone to love.”",
            "author": "Marilyn Monroe"
        },
        {
            "quote": "“If I were not a physicist, I would probably be a musician. I often think in music. I live my daydreams in music. I see my life in terms of music.”",
            "author": "Albert Einstein"
        },
        {
            "quote": "“If you only read the books that everyone else is reading, you can only think what everyone else is thinking.”",
            "author": "Haruki Murakami"
        },
        {
            "quote": "“The difference between genius and stupidity is: genius has its limits.”",
            "author": "Alexandre Dumas fils"
        },
        {
            "quote": "“He's like a drug for you, Bella.”",
            "author": "Stephenie Meyer"
        },
        {
            "quote": "“There is no friend as loyal as a book.”",
            "author": "Ernest Hemingway"
        },
        {
            "quote": "“When one door of happiness closes, another opens; but often we look so long at the closed door that we do not see the one which has been opened for us.”",
            "author": "Helen Keller"
        },
        {
            "quote": "“Life isn't about finding yourself. Life is about creating yourself.”",
            "author": "George Bernard Shaw"
        },
        {
            "quote": "“That's the problem with drinking, I thought, as I poured myself a drink. If something bad happens you drink in an attempt to forget; if something good happens you drink in order to celebrate; and if nothing happens you drink to make something happen.”",
            "author": "Charles Bukowski"
        },
        {
            "quote": "“You don’t forget the face of the person who was your last hope.”",
            "author": "Suzanne Collins"
        },
        {
            "quote": "“Remember, we're madly in love, so it's all right to kiss me anytime you feel like it.”",
            "author": "Suzanne Collins"
        },
        {
            "quote": "“To love at all is to be vulnerable. Love anything and your heart will be wrung and possibly broken. If you want to make sure of keeping it intact you must give it to no one, not even an animal. Wrap it carefully round with hobbies and little luxuries; avoid all entanglements. Lock it up safe in the casket or coffin of your selfishness. But in that casket, safe, dark, motionless, airless, it will change. It will not be broken; it will become unbreakable, impenetrable, irredeemable. To love is to be vulnerable.”",
            "author": "C.S. Lewis"
        },
        {
            "quote": "“Not all those who wander are lost.”",
            "author": "J.R.R. Tolkien"
        },
        {
            "quote": "“Do not pity the dead, Harry. Pity the living, and, above all those who live without love.”",
            "author": "J.K. Rowling"
        },
        {
            "quote": "“There is nothing to writing. All you do is sit down at a typewriter and bleed.”",
            "author": "Ernest Hemingway"
        },
        {
            "quote": "“Finish each day and be done with it. You have done what you could. Some blunders and absurdities no doubt crept in; forget them as soon as you can. Tomorrow is a new day. You shall begin it serenely and with too high a spirit to be encumbered with your old nonsense.”",
            "author": "Ralph Waldo Emerson"
        },
        {
            "quote": "“I have never let my schooling interfere with my education.”",
            "author": "Mark Twain"
        },
        {
            "quote": "“I have heard there are troubles of more than one kind. Some come from ahead and some come from behind. But I've bought a big bat. I'm all ready you see. Now my troubles are going to have troubles with me!”",
            "author": "Dr. Seuss"
        },
        {
            "quote": "“If I had a flower for every time I thought of you...I could walk through my garden forever.”",
            "author": "Alfred Tennyson"
        },
        {
            "quote": "“Some people never go crazy. What truly horrible lives they must lead.”",
            "author": "Charles Bukowski"
        },
        {
            "quote": "“The trouble with having an open mind, of course, is that people will insist on coming along and trying to put things in it.”",
            "author": "Terry Pratchett"
        },
        {
            "quote": "“Think left and think right and think low and think high. Oh, the thinks you can think up if only you try!”",
            "author": "Dr. Seuss"
        },
        {
            "quote": "“What really knocks me out is a book that, when you're all done reading it, you wish the author that wrote it was a terrific friend of yours and you could call him up on the phone whenever you felt like it. That doesn't happen much, though.”",
            "author": "J.D. Salinger"
        },
        {
            "quote": "“The reason I talk to myself is because I’m the only one whose answers I accept.”",
            "author": "George Carlin"
        },
        {
            "quote": "“You may say I'm a dreamer, but I'm not the only one. I hope someday you'll join us. And the world will live as one.”",
            "author": "John Lennon"
        },
        {
            "quote": "“I am free of all prejudice. I hate everyone equally. ”",
            "author": "W.C. Fields"
        },
        {
            "quote": "“The question isn't who is going to let me; it's who is going to stop me.”",
            "author": "Ayn Rand"
        },
        {
            "quote": "“′Classic′ - a book which people praise and don't read.”",
            "author": "Mark Twain"
        },
        {
            "quote": "“Anyone who has never made a mistake has never tried anything new.”",
            "author": "Albert Einstein"
        },
        {
            "quote": "“A lady's imagination is very rapid; it jumps from admiration to love, from love to matrimony in a moment.”",
            "author": "Jane Austen"
        },
        {
            "quote": "“Remember, if the time should come when you have to make a choice between what is right and what is easy, remember what happened to a boy who was good, and kind, and brave, because he strayed across the path of Lord Voldemort. Remember Cedric Diggory.”",
            "author": "J.K. Rowling"
        },
        {
            "quote": "“I declare after all there is no enjoyment like reading! How much sooner one tires of any thing than of a book! -- When I have a house of my own, I shall be miserable if I have not an excellent library.”",
            "author": "Jane Austen"
        },
        {
            "quote": "“There are few people whom I really love, and still fewer of whom I think well. The more I see of the world, the more am I dissatisfied with it; and every day confirms my belief of the inconsistency of all human characters, and of the little dependence that can be placed on the appearance of merit or sense.”",
            "author": "Jane Austen"
        },
        {
            "quote": "“Some day you will be old enough to start reading fairy tales again.”",
            "author": "C.S. Lewis"
        },
        {
            "quote": "“We are not necessarily doubting that God will do the best for us; we are wondering how painful the best will turn out to be.”",
            "author": "C.S. Lewis"
        },
        {
            "quote": "“The fear of death follows from the fear of life. A man who lives fully is prepared to die at any time.”",
            "author": "Mark Twain"
        },
        {
            "quote": "“A lie can travel half way around the world while the truth is putting on its shoes.”",
            "author": "Mark Twain"
        },
        {
            "quote": "“I believe in Christianity as I believe that the sun has risen: not only because I see it, but because by it I see everything else.”",
            "author": "C.S. Lewis"
        },
        {
            "quote": "“The truth.\" Dumbledore sighed. \"It is a beautiful and terrible thing, and should therefore be treated with great caution.”",
            "author": "J.K. Rowling"
        },
        {
            "quote": "“I'm the one that's got to die when it's time for me to die, so let me live my life the way I want to.”",
            "author": "Jimi Hendrix"
        },
        {
            "quote": "“To die will be an awfully big adventure.”",
            "author": "J.M. Barrie"
        },
        {
            "quote": "“It takes courage to grow up and become who you really are.”",
            "author": "E.E. Cummings"
        },
        {
            "quote": "“But better to get hurt by the truth than comforted with a lie.”",
            "author": "Khaled Hosseini"
        },
        {
            "quote": "“You never really understand a person until you consider things from his point of view... Until you climb inside of his skin and walk around in it.”",
            "author": "Harper Lee"
        },
        {
            "quote": "“You have to write the book that wants to be written. And if the book will be too difficult for grown-ups, then you write it for children.”",
            "author": "Madeleine L'Engle"
        },
        {
            "quote": "“Never tell the truth to people who are not worthy of it.”",
            "author": "Mark Twain"
        },
        {
            "quote": "“A person's a person, no matter how small.”",
            "author": "Dr. Seuss"
        },
        {
            "quote": "“... a mind needs books as a sword needs a whetstone, if it is to keep its edge.”",
            "author": "George R.R. Martin"
        }
    ]
}

El código de mi solución:

import scrapy

#Titulo: //h1/a/text()
#Citas: //span[@class="text" and @itemprop="text"]/text()
#Tags: //div[contains(@class, tags-box)]/span[@class="tag-item"]/a/text()
#Next page button: //ul[@class="pager"]//li[@class="next"]/a/@href


class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'https://quotes.toscrape.com/'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',        
        'FEED_EXPORT_ENCODING' : 'utf-8',
        #'CONCURRENT_REQUESTS': 24,
        #'MEMUSAGE_LIMIT_MB': 2048,
        #'MEMUSAGE_NOTIFY_MAIL' : ['jrguevaral@gmail.com'],
        'USER_AGENT' :  'JaviGuevara'
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath('//div[@class="quote"]/span/small/text()').getall())

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={
                                                                                                    'quotes': quotes,
                                                                                                    'authors': authors
                                                                                                    })
        else:
            yield {
                'quote': quotes,
                'author': authors
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath('//div[@class="quote"]/span/small/text()').getall()
        top_tags = response.xpath('//div[contains(@class, tags-box)]/span[@class="tag-item"]/a/text()').getall()
        
        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title': title,
            'top_ten_tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={
                                                                                                    'quotes': quotes,
                                                                                                    'authors': authors                                                                                                    })

Acá va el reto, linda oportunidad para usar el zip de Python 😃

class QuotesSpider(scrapy.Spider):
    def parse_only_quotes(self, response, **kwargs):
        quotes = self.get_quotes(response)
        kwargs.get('quotes').extend(quotes)

        if next_url := self.get_next_url(response):
            yield response.follow(next_url,
                                  callback=self.parse_only_quotes,
                                  cb_kwargs=kwargs)
        else:
            yield kwargs

    def parse(self, response):
        data = {
            "title": self.get_title(response),
            "tags": self.get_tags(response),
            "quotes": self.get_quotes(response),
        }

        if next_url := self.get_next_url(response):
            yield response.follow(next_url,
                                  callback=self.parse_only_quotes,
                                  cb_kwargs=data)
        else:
            yield data

    def get_title(self, response):
        return response.xpath(TITLES_XPATH).get()

    def get_tags(self, response):
        return response.xpath(TAGS_XPATH).getall()

    def get_next_url(self, response):
        return response.xpath(NEXT_BUTTON_XPATH).get()

    def get_quotes(self, response):
        quote_texts = response.xpath(QUOTES_XPATH).getall()
        quote_authors = response.xpath(AUTHORS_XPATH).getall()

        return [{
            "text": text,
            "author": author
        } for text, author in zip(quote_texts, quote_authors)]

Aquí esta mi respuesta al reto 😃

cb_kwargs={'quotes': list(map(lambda qa: {'quote': qa[0], 'author': qa[1]}, list(zip(quotes, authors))))}

No quise agregar todo el código pues es el mismo solo que con la modificación del argumento kwargs. Básicamente obtuve las citas y los autores por separado y luego los junte en una lista de tuplas con la función zip juntando la cita y el autor y luego con esto use la función map para pasar cada tupla a un diccionario.

Reto de la clase

import scrapy

# Titulo  = //h1/a/text()
# Citas = //span[@class="text" and @itemprop="text"]/text()
# Top_ten_tags = //div[@class="col-md-4 tags-box"]//span[@class="tag-item"]/a/text()
# Next page button = //div[@class="col-md-8"]//li[@class="next"]//a/@href

class QuotesSpider(scrapy.Spider):
    name = 'quotes' # Nombre unico con el que scrapy 
                    # se refiere a este spider en el proyecto
    start_urls = [
        'h t t p ://quotes.toscrape.com/page/1'
    ]

    custom_settings = {
                'FEED_URI':'quotes.json',
                'FEED_FORMAT':'json',
                'CONCURRENT_REQUESTS': 24, # Con esto se hace 24 request a la vez
                'MEMUSAGE_LIMIT_MB': 2048, # Cuanta memoria ram le dejamos usar al framework
                'MEMUSAGE_NOTIFY_MAIL': '[email protected]',
                'ROBOTSTXT_OBEY': True, # Obdecer robots
                'USER_AGENT': 'PepitoMartinez', # En vez de chrome sale el nobmre de pepito
                'FEED_EXPORT_ENCODING': 'utf-8'
    }   


    def parse_only_quotes(self, response, **kwargs): # El ** quiere decir que se va a desempaquetar el diccionario de argumentos aqui
        if kwargs:
            quotes = kwargs['quotes'] 
            authors = kwargs['authors']
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()) #Citas de la segunda pagina
        authors.extend(response.xpath('//div[@class="quote"]//small[@class="author"]/text()').getall())

        next_page_button_link = response.xpath('//div[@class="col-md-8"]//li[@class="next"]//a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':quotes, 'authors':authors})
        else:
            quotes_author = []
            for i in range(len(quotes)):
                quotes_author.append(quotes[i]) 
                quotes_author.append(authors[i])
            yield {'quotes_author': quotes_author}
            

    def parse(self, response):

        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall() # quotes primera pagina
        authors = response.xpath('//div[@class="quote"]//small[@class="author"]/text()').getall() #authors primera pagina
        top_tags = response.xpath('//div[@class="col-md-4 tags-box"]//span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, 'top', None) # Si existe dentro de la ejecucion de este spider, un atributo de nombre top
                                         # voy a guardar el resultado, dentro de mi variable top
                                         # Si ese resultado no existe voy a obtener None
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath('//div[@class="col-md-8"]//li[@class="next"]//a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':quotes, 'authors':authors}) # kwargs stands for keyword arguments

'USER_AGENT’
Hay algunas páginas que bloquean el acceso a los scrappers y es ahí cuando entra el ‘USER_AGENT’, este enmascara al usurio de cara al sitio web y así no restringe el acceso a nuestra spider para scrapear la información.
Normalmente cuando esto ocurre yo suelo cambiar el user_agent :

'USER_AGENT': 'Mozilla/5.0'

Para más información acá un artículo

import scrapy

# Titulo = //h1/a/text()
# Citas = //span[@class="text" and @itemprop="text"]/text()
# Top Ten Tags = //div[contains(@class, "tags-box")]/span[@class="tag-item"]/a/text()
# next page button = //ul[@class="pager"]/li[@class="next"]/a/@href

class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = {
        'http://quotes.toscrape.com'
    }
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'PepitoMartinez',
        'FEED_EXPORT_ENCODING': 'utf-8'
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            autor = kwargs['autor']
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        autor.extend(response.xpath('//span/small[@class="author" and @itemprop="author"]/text()').getall())

        quotesAutor = list(zip(quotes, autor))
        
        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs ={'quotes': quotes, 'autor': autor})
        else:
            yield {
                "Quotes + autor": quotesAutor
            }

    def parse(self, response):
        titulo = response.xpath('//h1/a/text()').get()
        top_tags = response.xpath('//div[contains(@class, "tags-box")]/span[@class="tag-item"]/a/text()').getall()
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        autor = response.xpath('//span/small[@class="author" and @itemprop="author"]/text()').getall()
        quotesAutor = list(zip(quotes, autor))
        
        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'Titulo': titulo,
            'Top Tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs = {'quotes': quotes, 'autor': autor})
        else:
            yield {
                "Quotes + autor": quotesAutor
            }
import scrapy

#Titulo = //h1/a/text()
#Citas = //span[@class="text" and @itemprop="text"]/text()
#top ten = '//div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()
#Author = '//div[@class="quote"]//small[@class="author" and @itemprop="author"]/text()'
#Next page button = '//ul[@class="pager"]//li[@class="next"]/a/@href'


class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'link pagina'
        ]
    
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS' : 24,
        'MENSAGE_LIMIT_MB' : 2848,
        'MENSAGE_NOTIFY_MAIL' : {'[email protected]'},
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT' : 'PepitoMartinez',
        'FEED_EXPORT_ENCODING' : 'utf-8'
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath('//small[@class="author"]/text()').getall())

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs = {'quotes':quotes, 'authors':authors})
        else:
            numero_quotes = len(quotes)
            quotes_authors = []

            for i in range(numero_quotes):
                quote=str(quotes[i])+' by '+str(authors[i])
                quotes_authors.append(quote)
                 

            yield {
                'quotes': quotes_authors
            }   

    def parse(self, response):
        
        title = response.xpath('//h1/a/text()').get()
       
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath('//small[@class="author"]/text()').getall()

        top_tags = response.xpath('//div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()').getall()
       
        top = getattr(self, 'top', None)

        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {'title': title, 
                #'quotes': quotes, 
                'top tags': top_tags
                }

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs = {'quotes':quotes, 'authors':authors})

comparto mi codigo:

import scrapy

# Titulo = //h1/a/text()
# citas = //span[@class="text" and @itemprop = "text"]/text()
# top ten tags = //div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()
# Next page button = //ul[@class="pager"]//li[@class="next"]/a/@href'
class QhotesSpider(scrapy.Spider):
    name = "quotes"
    start_urls = ["http://quotes.toscrape.com/"]

    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL':['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT' : 'PepitoMartinez',
        'FEED_EXPORT_ENCODING' : 'utf-8'
    }

    def parse_only_quotes(self,response, **kwargs):
        if kwargs:
            combinacion = kwargs['quotes']
        quotes = response.xpath('//span[@class="text" and @itemprop = "text"]/text()').getall()
        author = response.xpath('//span/small[@class="author"]/text()').getall()
        union = list(map(lambda x,y:x+"==> "+y,author,quotes))
        combinacion.extend(union)

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':combinacion})
        else:
            yield {
                'quotes': combinacion
            }
            
    def parse(self, response):
        
        title = response.xpath('//h1/a/text()').get()
        #print(response.status, response.headers)
        quotes = response.xpath('//span[@class="text" and @itemprop = "text"]/text()').getall()
              
        top_tags = response.xpath('//div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()').getall()
        
        author = response.xpath('//span/small[@class="author"]/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title':title,
            'top_ten_tags':top_tags
        }

        combinacion = list(map(lambda x,y:x+"==> "+y,author,quotes))

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':combinacion})

Desde Scrapy 2.1.0 (2020-04-24), FEED_URI y FEED_FORMAT están obsoletos y debe implementarse FEEDS. Sin embargo no encuentro cómo hacer la actualización requerida. Me ayudan?

import scrapy

# Titulo = //h1/a/text()
# Citas = //span[@class="text" and @itemprop="text"]/text()
# Top ten tags = //div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()
# Next page button = '//ul[@class="pager"]//li[@class="next"]/a/@href'
# Author = '//small[@class="author"]/text()'

class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'http://quotes.toscrape.com/'
    ]
    custom_settings ={
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS' : 24,
        'MEMUSAGE_LIMIT_MB' : 2048,
        'MEMUSAGE_NOTIFY_MAIL' : ['[email protected]'],
        'ROBOTSTXT_OBEY' : True,
        'USER_AGENT' : 'JuansitoPiroelectrico',
        'FEED_EXPORT_ENCODING' : 'utf-8'
    }


    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            author = kwargs['author']
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        author.extend(response.xpath('//small[@class="author"]/text()').getall())

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback = self.parse_only_quotes, cb_kwargs = {'quotes' : quotes, 'author' : author})
        else:
            yield {
                'quotes' : quotes,
                'author' : author
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        author = response.xpath('//small[@class="author"]/text()').getall()
        top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield{
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback = self.parse_only_quotes, cb_kwargs = {'quotes' : quotes, 'author' : author})

esta es mi solución al reto, pero tengo dudas por que tuve que utilizar css y no xpath, por que con xpath no me funcionaba.

<import scrapy


class QuoteSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'http://quotes.toscrape.com/'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'Franco97',
        'FEED_EXPORT_ENCODING': 'utf-8'
        #'FEED_URI': 'quotes.csv',
        #'FEED_FORMAT': 'csv',
    }

    def parse_only_quotes(self, response):
        for quote in response.css("div.quote"):
            text = quote.css("span.text::text").get()
            author = quote.css("small.author::text").get()
            yield {
            'author': author,
            'text': text,
            }
        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback = self.parse_only_quotes)

    
       
    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        top_tags = response.xpath('//div[contains(@class, "tags-box")]/span[@class="tag-item"]/a/text()').getall()
        top = getattr(self, 'top', None) # if is top in this spider, save the value in top else top = None
        if top:
            top = int(top)
            top_tags = top_tags[:top]
        yield {
            'title': title,
            'top_tags': top_tags,
        }
        for quote in response.css("div.quote"):
            text = quote.css("span.text::text").get()
            author = quote.css("small.author::text").get()
            #print(dict(text=text, author=author))
            yield {
            'author': author,
            'text': text,
            }
        #for quote in response.xpath('//div[@class="quote"]'):
        #    author = quote.xpath('//small[@class="author"]/text()').get()
        #    text = quote.xpath('//span[@class="text"]/text()').get()
        #    print(dict(text=text, author=author))
        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback = self.parse_only_quotes)>

Dato que es de vital importancia tanto MENUSAGE_LIMIT_MB como MENUSAGE_NOTIFY_MAIL no funcionan en windows segun la documentacion.

Cumpliendo el reto 🚀

import scrapy

#Título://h1/a/text()
#Citas://span[@class="text" and @itemprop="text"]/text()
#autor: //small[@class="author" and @itemprop="author"]/text()
#Top Ten tags://div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()
#nextpagebuton =//ul[@class="pager"]//li[@class="next"]/a/@href
 
class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'http://quotes.toscrape.com/page/1/'
    ] 
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'PepitoPerez',
        'FEED_EXPORT_ENCODING': 'utf-8'
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        quotes.extend(response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath(
            '//small[@class="author" and @itemprop="author"]/text()').getall())

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, 
            cb_kwargs={'quotes': quotes, 'authors': authors})
        else:
            yield {
                'quotes': quotes,
                'authors': authors
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        top_tags = response.xpath(
            '//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()
        authors = response.xpath(
            '//small[@class="author" and @itemprop="author"]/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, 
            cb_kwargs={'quotes': quotes, 'authors': authors})

Una pregunta quiero cotejar datos de precios de productos entre amazon y mercadolibre, si el robots.txt no me impide hacer el scrapy entonces es que ¿si se peude realizar? ¿No me queda claro?

Mi código del reto:

def parse_only_quotes(self, response, **kwargs): 
        if kwargs:
            quotes = kwargs['quotes']
            author = kwargs['author']

        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        author.extend(response.xpath('//div[@class="quote"]/span/small[@class="author"]/text()').getall())

        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':quotes, 'author':author})
        else:
            quote_author = []
            for i in range(len(quotes)):
                phrase = f'{quotes[i]} --> {author[i]}'
                quote_author.append(phrase)

            yield {
                'quotes_authors': quote_author
            }

    def parse(self, response):
        
        #print(response.status, response.headers)
        title = response.xpath('//h1/a/text()').get()

        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()

        top_tags = response.xpath('//div[contains(@class,"tags-box")]/span[@class="tag-item"]/a/text()').getall()

        author = response.xpath('//div[@class="quote"]/span/small[@class="author"]/text()').getall()

        top = getattr(self, 'top', None) # esto le pregunta a scrapy si dentro de la ejecucion del spider un atributo con el nombre top (esto por consola), guarda el resultado en la variable, en caso contrario que no se envio guarda None
        if top:
            top = int(top)
            top_tags = top_tags[:top]
        
        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':quotes, 'author':author})

Un ejemplo del resultado del json:

"“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.” --> Albert Einstein"```

Mi propuesta:

import scrapy

# Título: //h1/a/text()
# Citas: //span[@class="text" and @itemprop="text"]/text()
# Autores: //span[not(@class)]/small[@class="author" and @itemprop="author"]/text()
# Top ten tags: //div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()
# Next page button: //ul[@class="pager"]/li[@class="next"]/a/@href


class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'http://quotes.toscrape.com/page/1/'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',  # Archivo de guardado de la extracción
        'FEED_FORMAT': 'json',      # Formato del archivo de guardado
        # 'FEEDS': 'quotes.json',  # Archivo de guardado de la extracción
        # Encoding de l archivo de guardado
        'FEED_EXPORT_ENCODING': 'utf-8',
        'CONCURRENT_REQUEST': 24,   # Número de peticiones concurrentes
        # RAM máxima que puede usar scrapy (útil cuando estos spiders corren en servidores nubes)
        'MEMUSAGE_LIMIT_MB': 2048,
        # Si se sobrepasa el nivel de RAM limititada
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]', '[email protected]', '[email protected]'],
        'ROBOTSTXT_OBEY': 'True',   # Obedecer o no el robots.txt
        # El usuario que se manda en la petición como el dueño de la misma
        'USER_AGENT': 'PepitoMartinez',
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
        texts = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath(
            '//span[not(@class)]/small[@class="author" and @itemprop="author"]/text()').getall()
        quotes.extend([{'text': text, 'author': author}
                       for text, author in zip(texts, authors)])
        next_page_button_link = response.xpath(
            '//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes})
        else:
            yield {
                'quotes': quotes
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        texts = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath(
            '//span[not(@class)]/small[@class="author" and @itemprop="author"]/text()').getall()
        quotes = [{'text': text, 'author': author}
                  for text, author in zip(texts, authors)]

        top_tags = response.xpath(
            '//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()
        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]
        yield {
            'title': title,
            'top_ten_tags': top_tags,
        }
        next_page_button_link = response.xpath(
            '//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes})

Reto solucionado.
Todo lo que hice fue capturar a los autores y pasarlos como parametro a la función parse_only_quotes, y generar un ciclo en el que se reescriben las citas incluyendo al autor.

def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        quotes.extend(response.xpath('//div[@class="quote"]/span[@class="text"]/text()').getall())
        authors.extend(response.xpath('//div[@class="quote"]/span/small[@class="author"]/text()').getall())
        

        next_page_button_link = response.xpath('//nav/ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'authors': authors})
        else:
            i = 0
            for quote, author in zip(quotes, authors):
                quotes[i] = f'{quote} {author}'
                i += 1
            yield {
                'quotes': quotes
            }

Código:

# Autor = '//small[@class="author"]/text()'

class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'http://quotes.toscrape.com/page/1/'
    ]

    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'FEED_EXPORT_ENCODING': 'utf-8'
    }


    def parse_quotes_and_authors(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath('//small[@class="author"]/text()').getall())

        quotes_and_authors = list(zip(quotes,authors))

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_quotes_and_authors, cb_kwargs={'quotes': quotes, 'authors': authors})
        else:
                yield {
                    'quotes_and_authors': quotes_and_authors
                }
            

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath('//small[@class="author"]/text()').getall()
        

        top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall(
        )

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_quotes_and_authors, cb_kwargs={'quotes': quotes, 'authors': authors})

Output:

Despues de un largo proceso de pensamiento logré al resultado esperado con un ciclo for:

def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            author = kwargs['author']
            quotes = kwargs['quotes']      
        #Extiende los valores de la lista     
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())    
        author.extend(response.xpath('//span/small[@class="author" and @itemprop="author"]/text()').getall())
        
        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':quotes,'author':author})
        else:
            for i, v in enumerate(author):
                name = ' ===> ' + author[i]
                quotes[i] = quotes[i] + name
            yield {
                'quotes': quotes
            } 

Un reto interesante que les recomiendo mucho es tratar de obtener para este caso las citas y autores, pero no usando el “nex_button”, sino entrando a http://quotes.toscrape.com/scroll, donde la página no tiene esta opción sino por el contrario permite hacer como muchas un “infinite scroll”. Dejo el código que permite obtener todo de una manera elegante, pero insisto bueno traten de hacerlo.

import scrapy 
import json

class Quotes_AuthorsScraper(scrapy.Spider):
    name='scroll_quotes'
    start_urls=['http://quotes.toscrape.com/api/quotes?page=1']


    def parse(self,response):
        data = json.loads(response.text)

        for quote in data['quotes']:
            # selecting the authors
            yield {
                'authors': quote['author']['name'],
                'quotes':quote['text']
            }
        if data['has_next']:
            next_page = data['page'] +1
            yield scrapy.Request(f'http://quotes.toscrape.com/api/quotes?page={next_page}',callback=self.parse)



Mi archivo quotes.json: (obviamente solo una, pero que demuestra cada quote!)

{
   "quote": "“The world as we have created it is a process of our thinking. It cannot be changed without 	changing our thinking.”",
    "author": "Albert Einstein"
}

El reto

import scrapy

# Título = //h1/a/text()
# Citas = //span[@class="text" and @itemprop="text"]/text()
# Autores = //small[@class="author" and @itemprop="author"]/text()
# Top ten tags = //div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()
# Next page button = //ul[@class="pager"]//li[@class="next"]/a/@href


class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'http : // quotes. toscrape. com / page / 1 /'
    ]

    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUEST': 24,  
        'MEMUSAGE_LIMIT_MB': 2048, 
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,  
        'USER_AGENT': 'CarolinaAcosta',  
        'FEED_EXPORT_ENCODING': 'utf-8'  

    }

    
    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes_authors = kwargs['quotes_authors']
    
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath(
            '//small[@class="author" and @itemprop="author"]/text()').getall()

        for i in range(len(quotes)):
            quotes_authors.append(quotes[i] + ' by ' + authors[i])

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes_authors': quotes_authors})
        else:
            yield {
                'quotes_authors': quotes_authors
            }

    def parse(self, response):

        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath(
            '//small[@class="author" and @itemprop="author"]/text()').getall()

        quotes_authors = []
        for i in range(len(quotes)):
            quotes_authors.append(quotes[i] + ' by ' + authors[i])

        top_tags = response.xpath(
            '//div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes_authors': quotes_authors})

Descripción de las configuraciones del Spider

    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUEST': 24,  # Le permite hacer 24 peticiones a la vez
        'MEMUSAGE_LIMIT_MB': 2048,  # La cantidad de memoria ram que le permitimos
                                    # usar a Scrapy para trabajar       
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],  # Notificar a
        # un administrador si Scrapy llega a sobrepasarse del límite de memoria ram
        'ROBOTSTXT_OBEY': True,  # Obedecer al archivo robots.txt
        'USER_AGENT': 'CarolinaAcosta',  # Para indicarle al sitio web la persona
        # que hizo la petición, en lugar de aparecer: Google Chrome, Firefox, etc
        'FEED_EXPORT_ENCODING': 'utf-8'  # Para no tener caracteres raros en el
        # archivo json (tildes y ñ's de forma correcta)

    }

Al unir quotes y author se hizo con zip, pero en comentario esta la opción con un ciclo for

def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            author = kwargs['author']

        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        author.extend(response.xpath('//small[@class="author" and @itemprop="author"]/text()').getall())

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()

        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'author': author})
        else:
            quotes_author = []
            # i = 0
            # for i in range(len(quotes)):
            #     quotes_author.append(quotes[i])
            #     quotes_author.append(author[i])
            quotes_author = list(zip(quotes, author))
            yield {
                'quotes': quotes_author
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        author = response.xpath('//small[@class="author" and @itemprop="author"]/text()').getall()
        top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall(
        )

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'author': author})
class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'https://quotes.toscrape.com/page/1/'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'JoelGaspar',
        'FEED_EXPORT_ENCODING': 'utf-8'
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        quotes.extend(response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath('//div[@class="quote"]/span/small[@class="author"]/text()').getall())

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes,
                cb_kwargs={'quotes': quotes, 'authors':authors})
        else:
            quote_author = []
            for i in range(len(quotes)):
                quote_author.append(quotes[i] + ' Author: ' + authors[i])
                #quote_author.append(authors[i])
            yield{
                'quotes_with_author':quote_author
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath('//div[@class="quote"]/span/small[@class="author"]/text()').getall()
        top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes,
                cb_kwargs={'quotes': quotes, 'authors': authors})```

Comparto el código y el resultado del desafío:

Una muestra del resultado:

Usando la misma lógica de esta clase, resolví el reto

import scrapy

# Titulo = //h1/a/text()
# Citas = //span[@class="text" and @itemprop="text"]/text()
# top ten tags = //div[contains(@class, "tags-box")]/span[@class="tag-item"]/a/text()
# Next page button = //ul[@class="pager"]/li[@class="next"]/a/@href
# Author = //small[@class="author" and @itemprop="author"]/text()

class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
       
    ]

    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'ROBOTSTXT_OBEY': True,
        'FEED_EXPORT_ENCODING': 'utf-8',
        'USER_AGENT': 'bobMarley'
    }

  

    def parse_quotes_and_authors(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        authors.extend(response.xpath('//small[@class="author" and @itemprop="author"]/text()').getall())

        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_quotes_and_authors, cb_kwargs={'quotes':quotes, 'authors':authors})
        else:
            yield {
                'quotes':quotes,
                'authors':authors
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()     
        quotes = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        authors = response.xpath('//small[@class="author" and @itemprop="author"]/text()').getall()
        top_tags = response.xpath('//div[contains(@class, "tags-box")]/span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()     

        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_quotes_and_authors, cb_kwargs={'quotes':quotes, 'authors':authors})



Comparto mi solución con algunos comentarios:

import scrapy

class QuotesSpider(scrapy.Spider):
    name = 'quotes' #nombre unico no repetible
    start_urls = [
        'https://quotes.toscrape.com/'
    ]

    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',   #Para crear archivo json directamente sin necesidad de indicar en consola
        'CONCURRENT_REQUESTS': 24,  #Establece un entero para hacer n repeticiones a la vez
        'MEMUSAGE_LIMIT_MB': 2048,   #Establece cuánta RAM se puede usar
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],   #Notifica cuando se llega al límite de RAM
        'ROBOTSTXT_OBEY': True,  #Para obedecer el robots.txt
        'USER_AGENT': 'JuanchoTacorta',  #Para indicar usuario al servidor
        'FEED_EXPORT_ENCODING': 'utf-8' #Para no tener caracteres raros
    }   

    title = '//h1/a/text()'
    quotes = '//span[@class="text" and @itemprop="text"]/text()'
    top_tags = '//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()'
    next_page_button = '//ul[@class="pager"]//li[@class="next"]/a/@href'
    authors = '//small[@class="author" and @itemprop="author"]/text()'

	
	
    def parse_only_quotes(self, response, **kwargs):  #Para extraer exclusivamente las citas
        if kwargs:
            quotes = kwargs['quotes']
            authors = kwargs['authors']

        quotes.extend(response.xpath(self.quotes).getall())
        #quotes.extend sirve para combiar las citas de la 1era página con los de la 2da página
        quotes.extend(response.xpath(self.authors).getall())

        #Vamos a la siguiente página
        next_page_button = response.xpath(self.next_page_button).get()

        if next_page_button:
            yield response.follow(
                next_page_button, callback=self.parse_only_quotes, 
                cb_kwargs={'quotes': quotes, 'authors': authors})
        else:
            #quotes = list(zip(quotes, authors))
            yield{
                'quotes': list(zip(quotes, authors))
            }


    def parse(self, response):      
        title = response.xpath(self.title).get()
        quotes = response.xpath(self.quotes).getall()
        top_tags = response.xpath(self.top_tags).getall()
        authors = response.xpath(self.authors).getall()   
        
        top = getattr(self, 'top', None)    
        #Si existe un atributo de nombre top guardo el resultado en variable top
        #Si no existe se guarda None
        if top:
            top = int(top)
            top_tags = top_tags[:top]  #Para que lista sea hasta el entero registrado en top

        yield{
            'title': title,
            'top_tags': top_tags,   
             }

        #Continuando la página, self.parse para que vuelva a ejecutar la función parse
        next_page_button = response.xpath(self.next_page_button).get()
        if next_page_button:
            yield response.follow(
                next_page_button, callback=self.parse_only_quotes, 
                cb_kwargs={'quotes': quotes, 'authors': authors})

Les comparto mi resultado y mi código usando estructuras For para anexar los autores a cada cita:

Resultado:

{
        "title": "Quotes to Scrape",
        "top_tags": [
            "love",
            "inspirational",
            "life",
            "humor",
            "books",
            "reading",
            "friendship"
        ]
    },
    {
        "quotes": [
            {
                "quote": "“The world as we have created it is a 		 process of our thinking. It cannot be changed without changing 
our thinking.”",
                "author": "Albert Einstein"
            },
            {
                "quote": "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",
                "author": "Albert Einstein"
            },
	    ...
	   {
                "quote": "“... a mind needs books as a sword needs a whetstone, if it is to keep its edge.”",
                "author": "J.K. Rowling"
            }
 	]
    } 

quotes.py:

import scrapy

# Título = //h1/a/text()
# Citas = //span[@class="text" and @itemprop="text"]/text()
# Autor = //small[@class="author" and @itemprop="author"]/text()
# Top ten tags = //div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()
# Next page button = //ul[@class="pager"]/li[@class="next"]/a/@href

class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'https://quotes.toscrape.com/'
    ]
    custom_settings = {
        'FEEDS':
        {
            "quotes.json":
            {
                "format":"json",
                "encoding":"utf-8"
            }
        },
        'CONCURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['your email'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'LexTomato'
    }


    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
        quotes_all = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        list_quotes = []
        for quote in quotes_all:
            author = response.xpath('//small[@class="author" and @itemprop="author"]/text()').get()
            list_quotes.append({'quote':quote, 'author':author})

        quotes.extend(list_quotes)

        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':quotes})
        else:
            yield {
                'quotes': quotes
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes_all = response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        quotes = []
        for quote in quotes_all:
            author = response.xpath('//small[@class="author" and @itemprop="author"]/text()').get()
            quotes.append({'quote':quote, 'author':author})
        top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]
            

        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes':quotes})

Puedes elegir con -a top=x la cantidad de quots-autores que deseas ver. Si no pones nada, muestra todos

    def parse_QA(self,response,**kwargs):
        if kwargs: 
            quotes = kwargs['Quotes']
            authors = kwargs['Authors']

        quotes.extend(response.xpath('//span[@class="text"]/text()').getall())
        authors.extend(response.xpath('//small/text()').getall())

        next_btn = response.xpath('//li[@class = "next"]/a/@href').get()
        
        top = getattr(self,'top',None)

        if top:
            top = int(top)
        else:
            top = len(quotes)

        if next_btn:
            yield response.follow(next_btn,callback = self.parse_QA,cb_kwargs = {'Quotes':quotes, 'Authors':authors} ) 
        else:
            quotes_author = []
            for idx in range(top):
                quotes_author.append(authors[idx])
                quotes_author.append(quotes[idx])
                yield {
                    'Author':quotes_author[idx],
                    'Quote':quotes_author[idx+1]
                    }


    def parse(self,response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath('//span[@class="text"]/text()').getall()
        main_tags = response.xpath('//div[@class="quote"]/div/a[position()<2]/text()').getall()
        authors = response.xpath('//small/text()').getall()

        yield {
            'Title':title,
            'Main Tags':main_tags
        }
        next_btn = response.xpath('//li[@class = "next"]/a/@href').get()
        if next_btn:
            yield response.follow(next_btn,callback = self.parse_QA,cb_kwargs = {'Quotes':quotes, 'Authors':authors})

Tengo el mismo código que todos, no logro todavía hacer cada cita con su autor, no sé si usar tuplas, me pierdo en la forma de guardar los datos.

def parse_quotes_and_authors(self, response, **kwargs):
    if kwargs:
        quotes = kwargs['quotes']
        authors = kwargs['authors']

quotes.extend(response.xpath(
'//span[@class="text" and @itemprop="text"]/text()').getall())
authors.extend(response.xpath(
'//small[@class="author" and @itemprop="author"]/text()').getall())

    next_page_button_link = response.xpath(
						'//li[@class="next"]/a/@href').get()
    if next_page_button_link:
            yield response.follow(
									next_page_button_link,
								 callback=self.parse_quotes_and_authors,
								 cb_kwargs={
								'quotes' : quotes, 
'authors' : authors})

    else:
       yield {
            'quotes' : quotes,
            'authors' : authors
        }

def parse(self, response):

   # Sólo abregar una llave al diciconario  
   next_page_button_link = response.xpath(
		'//li[@class="next"]/a/@href').get()

        if next_page_button_link:
            yield response.follow(next_page_button_link,
						callback=self.parse_quotes_and_authors,
						cb_kwargs={'quotes' : quotes, 'authors' : authors})


    ```

Resultado:

{
    "quotes": [
      {
        "text": "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”",
        "author": "Albert Einstein"
      },
      {
        "text": "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",
        "author": "J.K. Rowling"
      }
     ]
}

Code:

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            list_quotes = kwargs['quotes']
        list_quotes.extend(self.define_quotes(response.xpath('//div[@class="quote"]').getall()))

        next_button = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_button:
            yield response.follow(next_button, callback=self.parse_only_quotes, cb_kwargs={'quotes': list_quotes})
        else:
            yield {'quotes': list_quotes}

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        top_tags = response.xpath('//div[contains(@class,"tags-box")]/span[@class="tag-item"]/a/text()').getall()
        list_quotes = self.define_quotes(response.xpath('//div[@class="quote"]').getall())

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            'title': title,
            'top_ten_tags': top_tags
        }

        next_button = response.xpath('//ul[@class="pager"]/li[@class="next"]/a/@href').get()
        if next_button:
            yield response.follow(next_button, callback=self.parse_only_quotes, cb_kwargs={'quotes': list_quotes})


    def define_quotes(self, xpath_quotes):
        quotes = []
        for quote in xpath_quotes:
            text = Selector(text=quote).xpath('//span[@class="text"]/text()').get()
            author = Selector(text=quote).xpath('//small[@class="author"]/text()').get()
            q = {'text': text, 'author': author}
            quotes.append(q)

        return quotes

Código del spider

import scrapy

# Titulo = //h1/a/text()
# Citas = //span[@class="text" and @itemprop="text"]/text()
# Top ten tags = //div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()
# Next page button = //ul[@class="pager"]//li[@class="next"]/a/@href
# Author = //small[@class="author" and @itemprop="author"]/text()


class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'https://quotes.toscrape.com/'
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEDD_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'PepitoMartinez',
        'FEED_EXPORT_ENCODING': 'utf-8'
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes']
            author = kwargs['author']

        quotes.extend(response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall())

        author.extend(response.xpath(
            '//small[@class="author" and @itemprop="author"]/text()').getall())

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'author': author})

        else:
            i = 0
            quotes_author = []
            for i in range(len(quotes)):
                quotes_author.append(quotes[i])
                quotes_author.append(author[i])
            yield {
                'quotes': quotes_author
            }

    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath(
            '//span[@class="text" and @itemprop="text"]/text()').getall()
        author = response.xpath(
            '//small[@class="author" and @itemprop="author"]/text()').getall()
        top_tags = response.xpath(
            '//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()

        top = getattr(self, 'top', None)
        if top:
            top = int(top)
            top_tags = top_tags[:top]
        yield {
            'title': title,
            'top_tags': top_tags
        }

        next_page_button_link = response.xpath(
            '//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'author': author})

comando en el cmd

scrapy crawl quotes -a top=7

resultado json

{
    "title": "Quotes to Scrape",
    "top_tags": [
        "love",
        "inspirational",
        "life",
        "humor",
        "books",
        "reading",
        "friendship"
    ]
},
{
    "quotes": [
        "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”",
        "Albert Einstein",
        "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",
        "J.K. Rowling",
        "“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”",
        "Albert Einstein",
        "“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”",
        "Jane Austen",
        "“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”",
        "Marilyn Monroe",
        "“Try not to become a man of success. Rather become a man of value.”",
        "Albert Einstein",
        "“It is better to be hated for what you are than to be loved for what you are not.”",
        "André Gide",
        "“I have not failed. I've just found 10,000 ways that won't work.”",
        "Thomas A. Edison",
        "“A woman is like a tea bag; you never know how strong it is until it's in hot water.”",
        "Eleanor Roosevelt",
        "“A day without sunshine is like, you know, night.”",
        "Steve Martin",
        "“This life is what you make it. No matter what, you're going to mess up sometimes, it's a universal truth. But the good part is you get to decide how you're going to mess it up. Girls will be your friends - they'll act like it anyway. But just remember, some come, some go. The ones that stay with you through everything - they're your true best friends. Don't let go of them. Also remember, sisters make the best friends in the world. As for lovers, well, they'll come and go too. And baby, I hate to say it, most of them - actually pretty much all of them are going to break your heart, but you can't give up because if you give up, you'll never find your soulmate. You'll never find that half who makes you whole and that goes for everything. Just because you fail once, doesn't mean you're gonna fail at everything. Keep trying, hold on, and always, always, always believe in yourself, because if you don't, then who will, sweetie? So keep your head high, keep your chin up, and most importantly, keep smiling, because life's a beautiful thing and there's so much to smile about.”",
        "Marilyn Monroe",
        "“It takes a great deal of bravery to stand up to our enemies, but just as much to stand up to our friends.”",
        "J.K. Rowling",
        "“If you can't explain it to a six year old, you don't understand it yourself.”",
        "Albert Einstein",
        "“You may not be her first, her last, or her only. She loved before she may love again. But if she loves you now, what else matters? She's not perfect—you aren't either, and the two of you may never be perfect together but if she can make you laugh, cause you to think twice, and admit to being human and making mistakes, hold onto her and give her the most you can. She may not be thinking about you every second of the day, but she will give you a part of her that she knows you can break—her heart. So don't hurt her, don't change her, don't analyze and don't expect more than she can give. Smile when she makes you happy, let her know when she makes you mad, and miss her when she's not there.”",
        "Bob Marley",
        "“I like nonsense, it wakes up the brain cells. Fantasy is a necessary ingredient in living.”",
        "Dr. Seuss",
        "“I may not have gone where I intended to go, but I think I have ended up where I needed to be.”",
        "Douglas Adams",
        "“The opposite of love is not hate, it's indifference. The opposite of art is not ugliness, it's indifference. The opposite of faith is not heresy, it's indifference. And the opposite of life is not death, it's indifference.”",
        "Elie Wiesel",
        "“It is not a lack of love, but a lack of friendship that makes unhappy marriages.”",
        "Friedrich Nietzsche",
        "“Good friends, good books, and a sleepy conscience: this is the ideal life.”",
        "Mark Twain",
        "“Life is what happens to us while we are making other plans.”",
        "Allen Saunders",
        "“I love you without knowing how, or when, or from where. I love you simply, without problems or pride: I love you in this way because I do not know any other way of loving but this, in which there is no I or you, so intimate that your hand upon my chest is my hand, so intimate that when I fall asleep your eyes close.”",
        "Pablo Neruda",
        "“For every minute you are angry you lose sixty seconds of happiness.”",
        "Ralph Waldo Emerson",
        "“If you judge people, you have no time to love them.”",
        "Mother Teresa",
        "“Anyone who thinks sitting in church can make you a Christian must also think that sitting in a garage can make you a car.”",
        "Garrison Keillor",
        "“Beauty is in the eye of the beholder and it may be necessary from time to time to give a stupid or misinformed beholder a black eye.”",
        "Jim Henson",
        "“Today you are You, that is truer than true. There is no one alive who is Youer than You.”",
        "Dr. Seuss",
        "“If you want your children to be intelligent, read them fairy tales. If you want them to be more intelligent, read them more fairy tales.”",
        "Albert Einstein",
        "“It is impossible to live without failing at something, unless you live so cautiously that you might as well not have lived at all - in which case, you fail by default.”",
        "J.K. Rowling",
        "“Logic will get you from A to Z; imagination will get you everywhere.”",
        "Albert Einstein",
        "“One good thing about music, when it hits you, you feel no pain.”",
        "Bob Marley",
        "“The more that you read, the more things you will know. The more that you learn, the more places you'll go.”",
        "Dr. Seuss",
        "“Of course it is happening inside your head, Harry, but why on earth should that mean that it is not real?”",
        "J.K. Rowling",
        "“The truth is, everyone is going to hurt you. You just got to find the ones worth suffering for.”",
        "Bob Marley",
        "“Not all of us can do great things. But we can do small things with great love.”",
        "Mother Teresa",
        "“To the well-organized mind, death is but the next great adventure.”",
        "J.K. Rowling",
        "“All you need is love. But a little chocolate now and then doesn't hurt.”",
        "Charles M. Schulz",
        "“We read to know we're not alone.”",
        "William Nicholson",
        "“Any fool can know. The point is to understand.”",
        "Albert Einstein",
        "“I have always imagined that Paradise will be a kind of library.”",
        "Jorge Luis Borges",
        "“It is never too late to be what you might have been.”",
        "George Eliot",
        "“A reader lives a thousand lives before he dies, said Jojen. The man who never reads lives only one.”",
        "George R.R. Martin",
        "“You can never get a cup of tea large enough or a book long enough to suit me.”",
        "C.S. Lewis",
        "“You believe lies so you eventually learn to trust no one but yourself.”",
        "Marilyn Monroe",
        "“If you can make a woman laugh, you can make her do anything.”",
        "Marilyn Monroe",
        "“Life is like riding a bicycle. To keep your balance, you must keep moving.”",
        "Albert Einstein",
        "“The real lover is the man who can thrill you by kissing your forehead or smiling into your eyes or just staring into space.”",
        "Marilyn Monroe",
        "“A wise girl kisses but doesn't love, listens but doesn't believe, and leaves before she is left.”",
        "Marilyn Monroe",
        "“Only in the darkness can you see the stars.”",
        "Martin Luther King Jr.",
        "“It matters not what someone is born, but what they grow to be.”",
        "J.K. Rowling",
        "“Love does not begin and end the way we seem to think it does. Love is a battle, love is a war; love is a growing up.”",
        "James Baldwin",
        "“There is nothing I would not do for those who are really my friends. I have no notion of loving people by halves, it is not my nature.”",
        "Jane Austen",
        "“Do one thing every day that scares you.”",
        "Eleanor Roosevelt",
        "“I am good, but not an angel. I do sin, but I am not the devil. I am just a small girl in a big world trying to find someone to love.”",
        "Marilyn Monroe",
        "“If I were not a physicist, I would probably be a musician. I often think in music. I live my daydreams in music. I see my life in terms of music.”",
        "Albert Einstein",
        "“If you only read the books that everyone else is reading, you can only think what everyone else is thinking.”",
        "Haruki Murakami",
        "“The difference between genius and stupidity is: genius has its limits.”",
        "Alexandre Dumas fils",
        "“He's like a drug for you, Bella.”",
        "Stephenie Meyer",
        "“There is no friend as loyal as a book.”",
        "Ernest Hemingway",
        "“When one door of happiness closes, another opens; but often we look so long at the closed door that we do not see the one which has been opened for us.”",
        "Helen Keller",
        "“Life isn't about finding yourself. Life is about creating yourself.”",
        "George Bernard Shaw",
        "“That's the problem with drinking, I thought, as I poured myself a drink. If something bad happens you drink in an attempt to forget; if something good happens you drink in order to celebrate; and if nothing happens you drink to make something happen.”",
        "Charles Bukowski",
        "“You don’t forget the face of the person who was your last hope.”",
        "Suzanne Collins",
        "“Remember, we're madly in love, so it's all right to kiss me anytime you feel like it.”",
        "Suzanne Collins",
        "“To love at all is to be vulnerable. Love anything and your heart will be wrung and possibly broken. If you want to make sure of keeping it intact you must give it to no one, not even an animal. Wrap it carefully round with hobbies and little luxuries; avoid all entanglements. Lock it up safe in the casket or coffin of your selfishness. But in that casket, safe, dark, motionless, airless, it will change. It will not be broken; it will become unbreakable, impenetrable, irredeemable. To love is to be vulnerable.”",
        "C.S. Lewis",
        "“Not all those who wander are lost.”",
        "J.R.R. Tolkien",
        "“Do not pity the dead, Harry. Pity the living, and, above all those who live without love.”",
        "J.K. Rowling",
        "“There is nothing to writing. All you do is sit down at a typewriter and bleed.”",
        "Ernest Hemingway",
        "“Finish each day and be done with it. You have done what you could. Some blunders and absurdities no doubt crept in; forget them as soon as you can. Tomorrow is a new day. You shall begin it serenely and with too high a spirit to be encumbered with your old nonsense.”",
        "Ralph Waldo Emerson",
        "“I have never let my schooling interfere with my education.”",
        "Mark Twain",
        "“I have heard there are troubles of more than one kind. Some come from ahead and some come from behind. But I've bought a big bat. I'm all ready you see. Now my troubles are going to have troubles with me!”",
        "Dr. Seuss",
        "“If I had a flower for every time I thought of you...I could walk through my garden forever.”",
        "Alfred Tennyson",
        "“Some people never go crazy. What truly horrible lives they must lead.”",
        "Charles Bukowski",
        "“The trouble with having an open mind, of course, is that people will insist on coming along and trying to put things in it.”",
        "Terry Pratchett",
        "“Think left and think right and think low and think high. Oh, the thinks you can think up if only you try!”",
        "Dr. Seuss",
        "“What really knocks me out is a book that, when you're all done reading it, you wish the author that wrote it was a terrific friend of yours and you could call him up on the phone whenever you felt like it. That doesn't happen much, though.”",
        "J.D. Salinger",
        "“The reason I talk to myself is because I’m the only one whose answers I accept.”",
        "George Carlin",
        "“You may say I'm a dreamer, but I'm not the only one. I hope someday you'll join us. And the world will live as one.”",
        "John Lennon",
        "“I am free of all prejudice. I hate everyone equally. ”",
        "W.C. Fields",
        "“The question isn't who is going to let me; it's who is going to stop me.”",
        "Ayn Rand",
        "“′Classic′ - a book which people praise and don't read.”",
        "Mark Twain",
        "“Anyone who has never made a mistake has never tried anything new.”",
        "Albert Einstein",
        "“A lady's imagination is very rapid; it jumps from admiration to love, from love to matrimony in a moment.”",
        "Jane Austen",
        "“Remember, if the time should come when you have to make a choice between what is right and what is easy, remember what happened to a boy who was good, and kind, and brave, because he strayed across the path of Lord Voldemort. Remember Cedric Diggory.”",
        "J.K. Rowling",
        "“I declare after all there is no enjoyment like reading! How much sooner one tires of any thing than of a book! -- When I have a house of my own, I shall be miserable if I have not an excellent library.”",
        "Jane Austen",
        "“There are few people whom I really love, and still fewer of whom I think well. The more I see of the world, the more am I dissatisfied with it; and every day confirms my belief of the inconsistency of all human characters, and of the little dependence that can be placed on the appearance of merit or sense.”",
        "Jane Austen",
        "“Some day you will be old enough to start reading fairy tales again.”",
        "C.S. Lewis",
        "“We are not necessarily doubting that God will do the best for us; we are wondering how painful the best will turn out to be.”",
        "C.S. Lewis",
        "“The fear of death follows from the fear of life. A man who lives fully is prepared to die at any time.”",
        "Mark Twain",
        "“A lie can travel half way around the world while the truth is putting on its shoes.”",
        "Mark Twain",
        "“I believe in Christianity as I believe that the sun has risen: not only because I see it, but because by it I see everything else.”",
        "C.S. Lewis",
        "“The truth.\" Dumbledore sighed. \"It is a beautiful and terrible thing, and should therefore be treated with great caution.”",
        "J.K. Rowling",
        "“I'm the one that's got to die when it's time for me to die, so let me live my life the way I want to.”",
        "Jimi Hendrix",
        "“To die will be an awfully big adventure.”",
        "J.M. Barrie",
        "“It takes courage to grow up and become who you really are.”",
        "E.E. Cummings",
        "“But better to get hurt by the truth than comforted with a lie.”",
        "Khaled Hosseini",
        "“You never really understand a person until you consider things from his point of view... Until you climb inside of his skin and walk around in it.”",
        "Harper Lee",
        "“You have to write the book that wants to be written. And if the book will be too difficult for grown-ups, then you write it for children.”",
        "Madeleine L'Engle",
        "“Never tell the truth to people who are not worthy of it.”",
        "Mark Twain",
        "“A person's a person, no matter how small.”",
        "Dr. Seuss",
        "“... a mind needs books as a sword needs a whetstone, if it is to keep its edge.”",
        "George R.R. Martin"
    ]
}
<code>
import scrapy
#***** xpath ****#
# Titulo = //h1/a/text()
# Citas = //span[@class="text" and @itemprop="text"]/text()
# top ten tags = //div[contains(@class,"tags-box")]//span[@class="tag-item"]/a/text()').getall()
# Autor = '//div[@class="quote"]//small[@class="author"]'
#para generar archivo con resultados en consola poner scrapy crawl quotes -o quotes.formato
#next page= //ul[@class="pager"]//li[@class="next"]/a/@href

class QuotesSpider(scrapy.Spider):
    author_quote=[]
    name = "quotes"
    start_urls=["https://quotes.toscrape.com/"
    ]
    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CONCURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'JuanRivano',
        'FEED_EXPORT_ENCODING': 'utf-8'
    }
    def generate_quotes_authot(self,authors,quotes):
        for x in range(len(quotes)):
            self.author_quote.append("Author: "+authors[x]+" Quotes: "+quotes[x])
        return self.author_quote

    def parse_only_quotes(self, response,**kwargs):
        if kwargs:
                quotes=kwargs['quotes']
                autores=kwargs['autor']
        quotes.extend(response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall())
        autores.extend(response.xpath('//div[@class="quote"]//small[@class="author"]/text()').getall())
        next_page_buton = response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_buton:
            # yeld es una salida parcial es comoel return pero parcial, el follow hace seguimiento al link requerido
            # usa dos parametros url a seguir y calback que es que ejecutar posterior a ir al link
            yield response.follow(next_page_buton, callback=self.parse_only_quotes, cb_kwargs={'quotes': quotes,'autor':autores})
        else:
            author_quote=self.generate_quotes_authot(autores,quotes)
            yield {
                'quotes':author_quote
            }

    def parse(self,response):
        title =response.xpath("//h1/a/text()").get()
        quotes= response.xpath('//span[@class="text" and @itemprop="text"]/text()').getall()
        autor=  response.xpath('//div[@class="quote"]//small[@class="author"]/text()').getall()
        top_tags=response.xpath("//div[contains(@class,'tags-box')]//span[@class='tag-item']/a/text()").getall()

        #codigo para argumentos
        #scrapy crawl quotes -a top=3 para ejecutar con argmentos
        top = getattr(self,'top',None)
        if top:
            top = int(top)
            top_tags=top_tags[:top]

        yield {'title': title,
                'top_tag': top_tags
               }

        next_page_buton=response.xpath('//ul[@class="pager"]//li[@class="next"]/a/@href').get()
        if next_page_buton:
            #yeld es una salida parcial es comoel return pero parcial, el follow hace seguimiento al link requerido
            #usa dos parametros url a seguir y calback que es que ejecutar posterior a ir al link
            yield response.follow(next_page_buton,callback=self.parse_only_quotes,cb_kwargs={'quotes':quotes,'autor':autor})



import scrapy

#titulo = //h1/a/text()
#citas = //span[@class = "text" and @itemprop = "text"]/text()
#top ten tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()
#boton next = response.xpath('//ul[@class = "pager"]//li[@class = "next"]/a/@href').get()
#author =  response.xpath('//div[@class ="quote"]//small[@class="author"]/text()').getall()

class QuotesSpider(scrapy.Spider):
    name = 'quotes'
    start_urls = [
        'htt p://quotes.toscrape.com/'
    ]

    custom_settings = {
        'FEED_URI': 'quotes.json',
        'FEED_FORMAT': 'json',
        'CURRENT_REQUESTS': 24,
        'MEMUSAGE_LIMIT_MB': 2048,
        'MEMUSAGE_NOTIFY_MAIL': ['[email protected]'],
        'ROBOTSTXT_OBEY': True,
        'USER_AGENT': 'PepitoPerez',
        'FEED_EXPORT_ENCODING': 'utf-8'
    }

    def parse_only_quotes(self, response, **kwargs):
        if kwargs:
            quotes = kwargs['quotes'] 
            author = kwargs['author']
        quotes.extend(response.xpath('//span[@class = "text" and @itemprop = "text"]/text()').getall())
        author.extend(response.xpath('//div[@class ="quote"]//small[@class="author"]/text()').getall())

        next_page_button_link = response.xpath('//ul[@class = "pager"]//li[@class = "next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback = self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'author': author})
        else:
            yield {
                'quotes': list(zip(quotes, author))
            }


    def parse(self, response):
        title = response.xpath('//h1/a/text()').get()
        quotes = response.xpath('//span[@class = "text" and @itemprop = "text"]/text()').getall() 
        top_tags = response.xpath('//div[contains(@class, "tags-box")]//span[@class="tag-item"]/a/text()').getall()
        author = response.xpath('//div[@class ="quote"]//small[@class="author"]/text()').getall()
        
        top = getattr(self, 'top', None) 
        # Si existe dentro de la ejecucion de este spider, un atributo de nombre top
        # voy a guardar el resultado, dentro de mi variable top
        # Si ese resultado no existe voy a obtener None
        if top:
            top = int(top)
            top_tags = top_tags[:top]

        yield {
            "title": title, 
            "top_quotes": top_tags
        }

        next_page_button_link = response.xpath('//ul[@class = "pager"]//li[@class = "next"]/a/@href').get()
        if next_page_button_link:
            yield response.follow(next_page_button_link, callback = self.parse_only_quotes, cb_kwargs={'quotes': quotes, 'author': author})
        else:
            yield {
                'quotes': list(zip(quotes, author))

            }```
[
{"title": "Quotes to Scrape", "top_tags": ["love", "inspirational", "life", "humor", "books"]},
{"quote": "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”", "author": "Albert Einstein"},
{"quote": "“It is our choices, Harry, that show what we truly are, far more than our abilities.”", "author": "J.K. Rowling"},
{"quote": "“This life is what you make it. No matter what, you're going to mess up sometimes, it's a universal truth. But the good part is you get to decide how you're going to mess it up. Girls will be your friends - they'll act like it anyway. But just remember, some come, some go. The ones that stay with you through everything - they're your true best friends. Don't let go of them. Also remember, sisters make the best friends in the world. As for lovers, well, they'll come and go too. And baby, I hate to say it, most of them - actually pretty much all of them are going to break your heart, but you can't give up because if you give up, you'll never find your soulmate. You'll never find that half who makes you whole and that goes for everything. Just because you fail once, doesn't mean you're gonna fail at everything. Keep trying, hold on, and always, always, always believe in yourself, because if you don't, then who will, sweetie? So keep your head high, keep your chin up, and most importantly, keep smiling, because life's a beautiful thing and there's so much to smile about.”", "author": "Marilyn Monroe"},
{"quote": "“It takes a great deal of bravery to stand up to our enemies, but just as much to stand up to our friends.”", "author": "J.K. Rowling"},
{"quote": "“I love you without knowing how, or when, or from where. I love you simply, without problems or pride: I love you in this way because I do not know any other way of loving but this, in which there is no I or you, so intimate that your hand upon my chest is my hand, so intimate that when I fall asleep your eyes close.”", "author": "Pablo Neruda"},
{"quote": "“For every minute you are angry you lose sixty seconds of happiness.”", "author": "Ralph Waldo Emerson"},
{"quote": "“The more that you read, the more things you will know. The more that you learn, the more places you'll go.”", "author": "Dr. Seuss"},
{"quote": "“Of course it is happening inside your head, Harry, but why on earth should that mean that it is not real?”", "author": "J.K. Rowling"},
{"quote": "“A reader lives a thousand lives before he dies, said Jojen. The man who never reads lives only one.”", "author": "George R.R. Martin"},
{"quote": "“You can never get a cup of tea large enough or a book long enough to suit me.”", "author": "C.S. Lewis"},
{"quote": "“There is nothing I would not do for those who are really my friends. I have no notion of loving people by halves, it is not my nature.”", "author": "Jane Austen"},
{"quote": "“Do one thing every day that scares you.”", "author": "Eleanor Roosevelt"},
{"quote": "“That's the problem with drinking, I thought, as I poured myself a drink. If something bad happens you drink in an attempt to forget; if something good happens you drink in order to celebrate; and if nothing happens you drink to make something happen.”", "author": "Charles Bukowski"},
{"quote": "“You don’t forget the face of the person who was your last hope.”", "author": "Suzanne Collins"},
{"quote": "“If I had a flower for every time I thought of you...I could walk through my garden forever.”", "author": "Alfred Tennyson"},
{"quote": "“Some people never go crazy. What truly horrible lives they must lead.”", "author": "Charles Bukowski"},
{"quote": "“Anyone who has never made a mistake has never tried anything new.”", "author": "Albert Einstein"},
{"quote": "“A lady's imagination is very rapid; it jumps from admiration to love, from love to matrimony in a moment.”", "author": "Jane Austen"},
{"quote": "“The truth.\" Dumbledore sighed. \"It is a beautiful and terrible thing, and should therefore be treated with great caution.”", "author": "J.K. Rowling"},
{"quote": "“I'm the one that's got to die when it's time for me to die, so let me live my life the way I want to.”", "author": "Jimi Hendrix"}
]

interesante