# -*- coding:utf-8 -*-
import requests
import re
import time
import json
import random
import datetime
from wordpress_xmlrpc import Client, WordPressPost
from wordpress_xmlrpc.methods.posts import GetPosts, NewPost
from wordpress_xmlrpc.methods.users import GetUserInfo
# 公众号文章链接
url = 'http://mp.weixin.qq.com/s?xxxxxxxxxxx'
# 微信公众号文章爬取
def get_article(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
content = response.text
# 标题
title = re.findall(r'<title>(.*?)</title>', content)[0]
# 内容
content = re.findall(r'<div class="rich_media_content " id="js_content">(.*?)</div>', content, re.S)[0]
content = re.sub(r'<.*?>', '', content)
content = content.replace(' ', '').replace('\n', '').replace('\u3000', '')
return title, content
else:
print('请求失败')
# 连接wordpress
wp = Client('http://www.example.com/xmlrpc.php', 'username', 'password')
# 发布文章
def post_article(title, content):
post = WordPressPost()
post.title = title
post.content = content
post.post_status = 'publish'
post.terms_names = {
'post_tag': ['Python', '爬虫'],
'category': ['Python']
}
wp.call(NewPost(post))
print('发布成功!')
if __name__ == '__main__':
title, content = get_article(url)
post_article(title, content)
爬取微信公众号文章并发布到wordpress
版权声明:本文采用知识共享 署名4.0国际许可协议 [BY-NC-SA] 进行授权
文章名称:《爬取微信公众号文章并发布到wordpress》
文章链接:https://www.gebizhan.com/1750.html
本站资源仅供个人学习交流,请于下载后24小时内删除,不允许用于商业用途,否则法律问题自行承担。
文章名称:《爬取微信公众号文章并发布到wordpress》
文章链接:https://www.gebizhan.com/1750.html
本站资源仅供个人学习交流,请于下载后24小时内删除,不允许用于商业用途,否则法律问题自行承担。