U
    h)                     @   sL   d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	 G dd deZ
dS )    )IteratorLiteralOptional)
BaseLoader)Document)get_from_envc                   @   sL   e Zd ZdZddddeee ed ee dddZe	e
 d	d
dZdS )SpiderLoaderzLoad web pages as Documents using Spider AI.

    Must have the Python package `spider-client` installed and a Spider API key.
    See https://spider.cloud for more.
    Nscrape)api_keymodeparamsr	   crawl)urlr
   r   r   c                C   s   |dkrddd}zddl m} W n tk
r>   tdY nX |dkrXtd	| d
|pdtdd}||d| _ || _|| _|| _dS )a  Initialize with API key and URL.

        Args:
            url: The URL to be processed.
            api_key: The Spider API key. If not specified, will be read from env
            var `SPIDER_API_KEY`.
            mode: The mode to run the loader in. Default is "scrape".
                 Options include "scrape" (single page) and "crawl" (with deeper
                 crawling following subpages).
            params: Additional parameters for the Spider API.
        NZmarkdownT)Zreturn_formatmetadatar   )SpiderzB`spider` package not found, please run `pip install spider-client`r   zUnrecognized mode 'z%'. Expected one of 'scrape', 'crawl'.r
   ZSPIDER_API_KEY)r
   )spiderr   ImportError
ValueErrorr   r   r   r   )selfr   r
   r   r   r    r   O/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/document_loaders/spider.py__init__   s&    

zSpiderLoader.__init__)returnc                 c   s   g }| j dkr2| jj| j| jd}|r^|| n,| j dkr^| jj| j| jd}|r^|| |D ]|}| j dkr|d dd}|d di }|dk	rt	||d	V  | j dkrb|dd}|di }|dk	rbt	||d	V  qbdS )
z+Load documents based on the specified mode.r	   )r   r   r   content r   N)page_contentr   )
r   r   Z
scrape_urlr   r   appendZ	crawl_urlextendgetr   )r   Zspider_docsresponsedocr   r   r   r   r   	lazy_load:   s.    




zSpiderLoader.lazy_load)__name__
__module____qualname____doc__strr   r   dictr   r   r   r"   r   r   r   r   r      s   
+r   N)typingr   r   r   Zlangchain_core.document_loadersr   Zlangchain_core.documentsr   Zlangchain_core.utilsr   r   r   r   r   r   <module>   s   