micolog插件-SaveRemoteImages自动保存远程图片 v0.0.1 九月 27th, 2010
给micolog写的一个插件,用于自动保存远程图片。
实现了:
- 自动解析文章中图片,并异步存储到gae的storage中
- 可以自定义ignore_domain_list,用于定义那些比较稳定的图床网站,当有来自于这些网站上的图片就不用保存到本地了
还不太完善,经常会提示保存出错,而且cpu占用比较高。会慢慢改进的。
goto micolog plugin download page
贴上代码,主要用了异步request,喜欢的同学随意使用,代码比较烂就是了
#!/usr/bin/env python
#coding=utf-8
'''
Created on Sep 19, 2010
Automatically save remote images to local gae storage
@author: wade (wade.beyond@gmail.com)
'''
import re
import uuid
import logging
from google.appengine.api import urlfetch
from micolog_plugin import Plugin, OptionSet
from model import Media
def _finish_fetch_remote_image(entry):
entry.srip_remote_images -= 1
if entry.srip_remote_images == 0:
entry.put()
logging.info('Finish fetch all remote images for entry(title=%s)' % entry.title)
def _handle_failed_fetch_remote_image(entry, dummy_image_url, remote_image_url, error):
logging.info("Failed to fetch remote image(%s) for entry(%s).\nerror: %s" %
(remote_image_url, entry.title, str(error)), exc_info=True)
entry.content = entry.content.replace(dummy_image_url, remote_image_url)
_finish_fetch_remote_image(entry)
def _handle_succeed_fetch_remote_image(response, entry, remote_image_url, local_image_name):
logging.info('Succeed get remote image in _handle_succeed_fetch_remote_image remote_image_url=%s, local_image_name=%s' %
(remote_image_url, local_image_name))
# save image to gae storage
mtype = response.headers['Content-Type']
mname = local_image_name
media=Media(name=mname, mtype=mtype, bits=response.content)
media.put()
saved_local_image_url = '/media/%s' % str(media.key())
entry.content = entry.content.replace(local_image_name, saved_local_image_url)
_finish_fetch_remote_image(entry)
def _handle_async_fetch_remote_image_result(rpc, entry, remote_image_url, local_image_name):
logging.info('Response remote image in _handle_async_fetch_remote_image_result remote_image_url=%s, local_image_name=%s' %
(remote_image_url, local_image_name))
response = rpc.get_result()
if response.status_code == 200:
_handle_succeed_fetch_remote_image(response, entry, remote_image_url, local_image_name)
else:
_handle_failed_fetch_remote_image(entry, local_image_name, remote_image_url,
Exception('code=%s content=%s' % (response.status_code, response.content)))
def _create_async_rpc_callback(rpc, entry, remote_image_url, will_save_as_local_image_name):
return lambda:_handle_async_fetch_remote_image_result(rpc, entry,
remote_image_url, will_save_as_local_image_name)
def _async_fetch_remote_images(entry, remote_image_urls):
if not remote_image_urls:
logging.info('No remote images in enrty(title=%s)' % entry.title)
return
logging.info('Remote images(cnt=%s) in entry(title=%s)' %
(len(remote_image_urls), entry.title))
logging.info('Remote image urls: \n%s' % '\n'.join(remote_image_urls))
setattr(entry, 'srip_remote_images', len(remote_image_urls))
image_name_prefix = entry.slug
if not image_name_prefix:
image_name_prefix = uuid.uuid4()
rpcs = []
for remote_image_url, index in \
zip(remote_image_urls, xrange(len(remote_image_urls))):
will_save_as_local_image_name = '%s-%s' % (image_name_prefix, index)
entry.content = entry.content.replace(remote_image_url, will_save_as_local_image_name)
try:
#_remote_image_url = copy.copy(remote_image_url),
#_will_save_as_local_image_name = copy.copy(will_save_as_local_image_name)
rpc = urlfetch.create_rpc(deadline=10)
rpc.callback = _create_async_rpc_callback(rpc, entry,
remote_image_url, will_save_as_local_image_name)
urlfetch.make_fetch_call(rpc, remote_image_url)
rpcs.append(rpc)
except Exception, e:
_handle_failed_fetch_remote_image(entry, will_save_as_local_image_name,
remote_image_url, e)
for rpc in rpcs:
rpc.wait()
class SaveRemoteImagesPlugin(Plugin):
def __init__(self):
Plugin.__init__(self, __file__)
self.blog = None
self.author="wade"
self.authoruri="http://i-and-world.appspot.com"
self.uri="http://i-and-world.appspot.com"
self.description="""Automatically save remote images to local gae storage, it's useful when u share other's articles."""
self.name="SaveRemoteImages"
self.version="0.0.1"
self.local_media_image_regex = re.compile(r"^../media", re.I)
self.remote_domain_image_regex = re.compile(r"^http", re.I)
self.all_images_regex = re.compile(r"<img[^>]*? src=[\'\"]?\s*([^\s\'\"]+?)[\'\"\s][^>]*>", re.I)
self.register_action("save_post",self.handle)
self.ignore_image_domains = OptionSet.getValue("srip_ignore_domains", [])
self.ignore_images_regex = re.compile(r'^(%s)' % '|'.join(self.ignore_image_domains), re.I)
def _reset_ignore_image_domains(self, ignore_domains=None):
if ignore_domains:
self.ignore_image_domains = [
domain.strip() for domain in ignore_domains
if domain.strip()
]
else:
self.ignore_image_domains = OptionSet.getValue("srip_ignore_domains", [])
if not self.ignore_image_domains:
self.ignore_image_domains = [self.blog.baseurl]
elif unicode(self.blog.baseurl) not in self.ignore_image_domains:
self.ignore_image_domains.insert(0, self.blog.baseurl)
logging.info('self.ignore_image_domains = %s' % r'^(%s)' % '|'.join(self.ignore_image_domains))
self.ignore_images_regex = re.compile(r'^(%s)' % '|'.join(self.ignore_image_domains), re.I)
OptionSet.setValue("srip_ignore_domains", self.ignore_image_domains)
def get(self, page):
if not self.ignore_image_domains:
self._reset_ignore_image_domains()
return '''<h3>Setup for your 'Save Remote Images Plugin'</h3>
<form action="" method="post">
<h3>ignore image domains</h3>
<span style="color:red">images from these domains will not be saved to gae.</span>
<b>one domain per line, the domain url should be with http/https prefix.</b><br/>
example:
<ul>%s<br/>
http://www.google.com<br/>
http://www.baidu.com</ul>
<textarea cols=50 rows=10 name='ignore_domains'>%s</textarea>
<br>
<input type="submit" value="submit">
</form>
Powered By <a href="http://www.i-and-world.com" target="_blank">I and the world!</a>
''' % (self.blog.baseurl, '\n'.join(self.ignore_image_domains))
def post(self ,page):
self._reset_ignore_image_domains(list(page.param("ignore_domains").split('\r\n')))
return self.get(page)
def handle(self, entry, *args, **kwargs):
remote_image_urls, has_local_media_urls = self.detect_remote_images(entry)
if remote_image_urls:
_async_fetch_remote_images(entry, remote_image_urls)
elif has_local_media_urls:
entry.put()
def detect_remote_images(self, entry):
all_image_urls = set(self.all_images_regex.findall(entry.content))
remote_image_urls = []
has_local_media_urls = False
for image_url in all_image_urls:
if self.local_media_image_regex.match(image_url):
has_local_media_urls = True
entry.content = entry.content.replace(image_url, image_url[2:])
elif (not self.ignore_images_regex.match(image_url)) \
and self.remote_domain_image_regex.match(image_url):
remote_image_urls.append(image_url)
return remote_image_urls, has_local_media_urls
save_remote_images = SaveRemoteImagesPlugin
是放在micolog根目录下吧?貌似没起作用阿?
你这验证码老要输两次……
哦,是放到plugins目录下阿?
just test the comment function
代码写的很漂亮……支持下
请问对于这样的地址该如何过滤?
支持通配符*吗?写成 http://*.static.flickr.com ??
http://farm6.static.flickr.com/5289/5359666571_4e43b3ac80_b.jpg
谢谢!
不支持通配符,但是支持正则表达式。
所以对于这个链接可以使用:http://[^.]+.static.flickr.com/
GAE不是说空间只有500M的吗,那要是快存满了怎么解决?
GAE对文件的数量和容量是有限制的,最好使用外部相册,imgur.com这个支持API,可以无限上传图片,代码小白,不会改代码,就指望你啦,哈哈!
There are some fascinating time limits in this article however I don’t know if I see all of them middle to heart. There may be some validity however I will take hold opinion until I look into it daitem further. Good article , thanks and we would like more! Added to FeedBurner as effectively