U
    h                     @   sf   d dl Z d dlmZmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZ e eZG dd de
ZdS )    N)AnyIteratorListOptionalSequence)Document)
BaseLoader)NewsURLLoaderc                   @   sp   e Zd ZdZdeee  ee eeeddddZ	e
e dd	d
Zeee dddZee dddZdS )RSSFeedLoaderaQ  Load news articles from `RSS` feeds using `Unstructured`.

    Args:
        urls: URLs for RSS feeds to load. Each articles in the feed is loaded into its own document.
        opml: OPML file to load feed urls from. Only one of urls or opml should be provided.  The value
        can be a URL string, or OPML markup contents as byte or string.
        continue_on_failure: If True, continue loading documents even if
            loading fails for a particular URL.
        show_progress_bar: If True, use tqdm to show a loading progress bar. Requires
            tqdm to be installed, ``pip install tqdm``.
        **newsloader_kwargs: Any additional named arguments to pass to
            NewsURLLoader.

    Example:
        .. code-block:: python

            from langchain_community.document_loaders import RSSFeedLoader

            loader = RSSFeedLoader(
                urls=["<url-1>", "<url-2>"],
            )
            docs = loader.load()

    The loader uses feedparser to parse RSS feeds.  The feedparser library is not installed by default so you should
    install it if using this loader:
    https://pythonhosted.org/feedparser/

    If you use OPML, you should also install listparser:
    https://pythonhosted.org/listparser/

    Finally, newspaper is used to process each article:
    https://newspaper.readthedocs.io/en/latest/
    NTF)urlsopmlcontinue_on_failureshow_progress_barnewsloader_kwargsreturnc                 K   s:   |dk|dkkrt d|| _|| _|| _|| _|| _dS )zInitialize with urls or OPML.Nz;Provide either the urls or the opml argument, but not both.)
ValueErrorr   r   r   r   r   )selfr   r   r   r   r    r   L/tmp/pip-unpacked-wheel-9gdii04g/langchain_community/document_loaders/rss.py__init__/   s    	zRSSFeedLoader.__init__)r   c              
   C   s\   |   }| jrTzddlm} W n, tk
rJ } ztd|W 5 d }~X Y nX ||}t|S )Nr   )tqdmzPackage tqdm must be installed if show_progress_bar=True. Please install with 'pip install tqdm' or set show_progress_bar=False.)	lazy_loadr   r   ImportErrorlist)r   iterr   er   r   r   loadD   s    zRSSFeedLoader.loadc              
   C   sb   | j r| j S zdd l}W n, tk
rD } ztd|W 5 d }~X Y nX || j}dd |jD S )Nr   zPackage listparser must be installed if the opml arg is used. Please install with 'pip install listparser' or use the urls arg instead.c                 S   s   g | ]
}|j qS r   )url).0feedr   r   r   
<listcomp>_   s     z+RSSFeedLoader._get_urls.<locals>.<listcomp>)r   
listparserr   parser   Zfeeds)r   r!   r   Zrssr   r   r   	_get_urlsR   s    zRSSFeedLoader._get_urlsc                 c   sR  zdd l }W n tk
r(   tdY nX | jD ]}z0||}t|ddrdtd| d|j W nL tk
r } z.| jrt	
d| d|  W Y q0n|W 5 d }~X Y nX zD|jD ]8}tf d|jgi| j}| d }||jd< |V  qW q0 tk
rJ } z2| jr6t	
d	|j d|  W Y q0n|W 5 d }~X Y q0X q0d S )
Nr   zMfeedparser package not found, please install it with `pip install feedparser`ZbozoFzError fetching z, exception: r   r   zError processing entry )
feedparserr   r#   r"   getattrr   Zbozo_exception	Exceptionr   loggererrorentriesr	   linkr   r   metadata)r   r$   r   r   r   entryloaderarticler   r   r   r   a   sB    





zRSSFeedLoader.lazy_load)NNTF)__name__
__module____qualname____doc__r   r   strboolr   r   r   r   r   propertyr#   r   r   r   r   r   r   r
      s"   $    
r
   )loggingtypingr   r   r   r   r   Zlangchain_core.documentsr   Z)langchain_community.document_loaders.baser   Z)langchain_community.document_loaders.newsr	   	getLoggerr/   r'   r
   r   r   r   r   <module>   s   
