Skip to content
This repository was archived by the owner on Jan 25, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
*.pyc
*.pyo
dist/*
requirements.txt
build/*
57 changes: 31 additions & 26 deletions PyOpenGraph/PyOpenGraph.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#coding: utf-8

#Copyright (c) 2010 Gerson Minichiello
#
Expand All @@ -21,7 +21,10 @@
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#THE SOFTWARE.

import re
import rdfadict
import urllib2
from bs4 import BeautifulSoup

OPENGRAPH_NAMESPACES = [
"http://opengraphprotocol.org/schema",
Expand All @@ -30,28 +33,30 @@
]

class PyOpenGraph(object):

def __init__(self, url=None, xml=None):
parser = rdfadict.RdfaParser()
if not xml:
result = parser.parse_url(url)
else:
result = parser.parse_string(xml, url)
data = result[url]
self.metadata = self.get_properties(data)

def get_properties(self, data):
content = {}
for k, v in data.iteritems():
for ns in OPENGRAPH_NAMESPACES:
if k.startswith(ns) and len(v)>0:
content[k.replace(ns, '')] = v[0]
return content

def __str__(self):
return self.metadata['title']

if __name__ == '__main__':
# Usage
og = PyOpenGraph('http://www.zappos.com/timberland-pro-titan-safety-toe-oxford')
print og.metadata

def __init__(self, url=None, xml=None, prefix=True):
if prefix:
parser = rdfadict.RdfaParser()
if not xml:
result = parser.parse_url(url)
else:
result = parser.parse_string(xml, url)
else:
result = self._parse_web(url)
data = result[url]
self.metadata = self.get_properties(data)

def get_properties(self, data):
content = {}
for k, v in data.iteritems():
for ns in OPENGRAPH_NAMESPACES:
if k.startswith(ns) and len(v)>0:
content[k.replace(ns, '')] = v[0]
return content

def _parse_web(self, url):
soup = BeautifulSoup( urllib2.urlopen(url).read() )
content = {}
for og in soup.findAll('meta', {'property':re.compile('og:')}):
content["{0}/{1}".format(OPENGRAPH_NAMESPACES[0], og['property'].split(':')[1])] = [ og['content'] ]
return {url:content}
69 changes: 69 additions & 0 deletions PyOpenGraph/test/PyOpenGraphTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#coding: utf-8
import unittest
from mock import patch
from nose.tools import *
from PyOpenGraph import PyOpenGraph as o

class MockResponse(object):

def __init__(self, _file, _type='html', code=200, msg='OK', headers={'content-type': 'text/plain; charset=utf-8'}):
self.file_test = ("%s.%s" % (_file, _type))
self._type = _type
self.code = code
self.msg = msg
self.headers = headers

def __getattr__(self, *args):
def func(*args):
handle = open(self.file_test)
html = "".join( handle )
if self._type == 'json':
import json
return json.loads(html)
return html
return func

def parse_string(self, *arg):
pass


class PyOpenGraph(unittest.TestCase):

@patch('PyOpenGraph.PyOpenGraph.rdfadict.RdfaParser')
def test_for_zappos_web_site_the_lib_should_be_load_og_metas(self, p):
"""
For the zappos web site, the lib should be load og metas.
"""
p.return_value = MockResponse('contents/zappos', _type='json', headers={'content-type': 'text/javascript; charset=utf-8'})
og = o.PyOpenGraph('http://www.zappos.com/timberland-pro-titan-safety-toe-oxford')
assert_equals(og.metadata['title'], u'Timberland PRO TiTAN® Safety Toe Oxford')
assert_equals(og.metadata['url'], 'http://www.zappos.com/timberland-pro-titan-safety-toe-oxford')
assert_equals(og.metadata['type'], 'product')
assert_equals(og.metadata['site_name'], 'Zappos.com')

@patch('PyOpenGraph.PyOpenGraph.rdfadict.RdfaParser')
def test_for_booking_web_site_the_lib_should_be_load_og_metas(self, p):
"""
For the booking web site, the lib should be load og metas.
"""
p.return_value = MockResponse('contents/booking', _type='json', headers={'content-type': 'text/javascript; charset=utf-8'})
og = o.PyOpenGraph('http://www.booking.com/hotel/br/best-western-sol-ipanema.pt-br.html')
assert_equals(og.metadata['title'], 'Best Western Plus Sol Ipanema Hotel, Rio de Janeiro, BR')
assert_equals(og.metadata['url'], 'http://www.booking.com/hotel/br/best-western-sol-ipanema.pt-br.html')
assert_equals(og.metadata['type'], 'booking_com:hotel')
assert_equals(og.metadata['site_name'], 'Booking.com')

@patch('PyOpenGraph.PyOpenGraph.rdfadict.RdfaParser')
@patch('PyOpenGraph.PyOpenGraph.urllib2.urlopen')
def test_case_player_does_n_use_prefix_og_at_head_the_lib_should_be_process_with_beaut_soap(self, u, p):
"""
Case player doest'n use prefix og at head, the lib should be process with beautiful soup.
"""
p.return_value = MockResponse('contents/hotelurbano', _type='json', headers={'content-type': 'text/javascript; charset=utf-8'})
u.return_value = MockResponse('contents/hotelurbano')

og = o.PyOpenGraph('http://www.hotelurbano.com/pacote/rio-de-janeiro-angra-dos-reis-melia-angra/48795', prefix=False)
assert_equals(og.metadata['title'], u'Angra dos Reis, Meliá Angra, 6x de R$ 70,00')
assert_equals(og.metadata['type'], 'website')
assert_equals(og.metadata['site_name'], 'hotelurbano.com')
assert_equals(og.metadata['url'], 'http://www.hotelurbano.com/pacote/rio-de-janeiro-angra-dos-reis-melia-angra/48795?cmp=895')
Empty file added PyOpenGraph/test/__init__.py
Empty file.
133 changes: 133 additions & 0 deletions PyOpenGraph/test/contents/booking.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
{
"umhpppVR28": {
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": [
"http://rdf.data-vocabulary.org/#Breadcrumb"
]
},
"umhpppVR26": {
"http://rdf.data-vocabulary.org/#title": [
"Rio de Janeiro"
],
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": [
"http://rdf.data-vocabulary.org/#Breadcrumb"
],
"http://rdf.data-vocabulary.org/#url": [
"http://www.booking.com/city/br/rio-de-janeiro.pt-br.html"
]
},
"umhpppVR27": {
"http://rdf.data-vocabulary.org/#title": [
"Ipanema"
],
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": [
"http://rdf.data-vocabulary.org/#Breadcrumb"
],
"http://rdf.data-vocabulary.org/#url": [
"http://www.booking.com/district/br/rio-de-janeiro/ipanema.pt-br.html"
]
},
"umhpppVR24": {
"http://rdf.data-vocabulary.org/#title": [
"BR"
],
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": [
"http://rdf.data-vocabulary.org/#Breadcrumb"
],
"http://rdf.data-vocabulary.org/#url": [
"http://www.booking.com/country/br.pt-br.html"
]
},
"umhpppVR25": {
"http://rdf.data-vocabulary.org/#title": [
"Rio de Janeiro"
],
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": [
"http://rdf.data-vocabulary.org/#Breadcrumb"
],
"http://rdf.data-vocabulary.org/#url": [
"http://www.booking.com/region/br/rio-de-janeiro.pt-br.html"
]
},
"umhpppVR23": {
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": [
"http://rdf.data-vocabulary.org/#Breadcrumb"
]
},
"http://www.booking.com/hotel/br/best-western-sol-ipanema.pt-br.html": {
"http://www.w3.org/1999/xhtml/vocab#alternate": [
"http://www.booking.com/hotel/br/best-western-sol-ipanema.pt-br.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.lv.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.el.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.vi.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.bg.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.et.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.sv.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.th.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.ca.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.sk.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.fr.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.ja.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.de.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.id.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.sl.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.fi.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.ro.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.no.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.sr.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.hr.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.ru.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.pl.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.nl.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.is.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.ms.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.ko.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.en-gb.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.zh-tw.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.it.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.hu.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.tl.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.he.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.pt-pt.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.uk.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.cs.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.es.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.ar.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.da.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.zh-cn.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.lt.html",
"http://www.booking.com/hotel/br/best-western-sol-ipanema.tr.html"
],
"http://opengraphprotocol.org/schema/description": [
"O Best Western Plus Sol Ipanema oferece uma localiza\u00e7\u00e3o excelente em frente \u00e0 Praia de Ipanema, em uma \u00e1rea animada repleta de bares e restaurantes."
],
"http://opengraphprotocol.org/schema/url": [
"http://www.booking.com/hotel/br/best-western-sol-ipanema.pt-br.html"
],
"http://opengraphprotocol.org/schema/image": [
"http://q-ec.bstatic.com/images/hotel/max300/252/25216239.jpg"
],
"http://opengraphprotocol.org/schema/type": [
"booking_com:hotel"
],
"http://www.w3.org/1999/xhtml/vocab#stylesheet": [
"http://q-ec.bstatic.com/static/css/main_edgecast/6a85ab590af4b04e06bf83ef799467cb8004188c.css",
"http://q-ec.bstatic.com/static/css/main_exps_edgecast/2da3060f29845ca3473745de3d5240825d875fc2.css",
"http://q-ec.bstatic.com/static/css/popups_edgecast/bfd3379059eb247e7ee9bfe880796e8128ddc522.css",
"http://r-ec.bstatic.com/static/css/hotel_edgecast/24fab22e3ef051a6962a40d7c5657e485da0192f.css",
"http://q-ec.bstatic.com/static/css/generic_profile_edgecast/e7179b3e770a877d344a7a8aa3e48109da0adf18.css"
],
"http://www.w3.org/1999/xhtml/vocab#icon": [
"http://r-ec.bstatic.com/static/img/b25logo/favicon/ebc77706da3aae4aee7b05dadf182390f0d26d11.ico"
],
"http://www.w3.org/1999/xhtml/vocab#help": [
"http://www.booking.com/faq.pt-br.html"
],
"http://opengraphprotocol.org/schema/site_name": [
"Booking.com"
],
"http://opengraphprotocol.org/schema/title": [
"Best Western Plus Sol Ipanema Hotel, Rio de Janeiro, BR"
]
}
}
Loading