extract info function

# helper function to extract information from a given article

def ArticleDataExtractor(some_url):
    '''function to pull ou all key information from a given article url'''
    
    from newspaper import Article
    
    output = {}
    article = Article(some_url)
    article.download()
    article.parse()
    
    
    output['url'] = some_url
    output['authors'] = article.authors
    output['pubDate'] = str(article.publish_date)
    output['title'] = article.title
    output['text'] = article.text
    
    # do some NLP
    article.nlp()
    
    output['keywords'] = article.keywords
    output['summary'] = article.summary
    
    
    return output

Send a Comment

Your email address will not be published.