hydrus/include/HydrusNetworking.py

556 lines
20 KiB
Python

import ClientConstants as CC
import HydrusConstants as HC
import HydrusExceptions
import HydrusSerialisable
import httplib
import multipart
import os
import threading
import time
import urllib
import urlparse
import yaml
import wx
import HydrusData
import itertools
import HydrusGlobals
def AddHydrusCredentialsToHeaders( credentials, request_headers ):
if credentials.HasAccessKey():
access_key = credentials.GetAccessKey()
if access_key != '': request_headers[ 'Hydrus-Key' ] = access_key.encode( 'hex' )
else: raise Exception( 'No access key!' )
def AddHydrusSessionKeyToHeaders( service_key, request_headers ):
session_manager = wx.GetApp().GetManager( 'hydrus_sessions' )
session_key = session_manager.GetSessionKey( service_key )
request_headers[ 'Cookie' ] = 'session_key=' + session_key.encode( 'hex' )
def AddCookiesToHeaders( cookies, request_headers ):
request_headers[ 'Cookie' ] = '; '.join( [ k + '=' + v for ( k, v ) in cookies.items() ] )
def CheckHydrusVersion( service_key, service_type, response_headers ):
service_string = HC.service_string_lookup[ service_type ]
if 'server' not in response_headers or service_string not in response_headers[ 'server' ]:
wx.GetApp().Write( 'service_updates', { service_key : [ HydrusData.ServiceUpdate( HC.SERVICE_UPDATE_ACCOUNT, HydrusData.GetUnknownAccount() ) ] })
raise HydrusExceptions.WrongServiceTypeException( 'Target was not a ' + service_string + '!' )
server_header = response_headers[ 'server' ]
( service_string_gumpf, network_version ) = server_header.split( '/' )
network_version = int( network_version )
if network_version != HC.NETWORK_VERSION:
if network_version > HC.NETWORK_VERSION: message = 'Your client is out of date; please download the latest release.'
else: message = 'The server is out of date; please ask its admin to update to the latest release.'
raise HydrusExceptions.NetworkVersionException( 'Network version mismatch! The server\'s network version was ' + HydrusData.ToString( network_version ) + ', whereas your client\'s is ' + HydrusData.ToString( HC.NETWORK_VERSION ) + '! ' + message )
def ConvertHydrusGETArgsToQuery( request_args ):
if 'subject_identifier' in request_args:
subject_identifier = request_args[ 'subject_identifier' ]
del request_args[ 'subject_identifier' ]
data = subject_identifier.GetData()
if subject_identifier.HasAccountKey(): request_args[ 'subject_account_key' ] = data.encode( 'hex' )
elif subject_identifier.HasHash(): request_args[ 'subject_hash' ] = data.encode( 'hex' )
if subject_identifier.HasMapping():
( subject_hash, subject_tag ) = data
request_args[ 'subject_hash' ] = subject_hash.encode( 'hex' )
request_args[ 'subject_tag' ] = subject_tag.encode( 'hex' )
if 'title' in request_args:
request_args[ 'title' ] = request_args[ 'title' ].encode( 'hex' )
query = '&'.join( [ key + '=' + HydrusData.ToString( value ) for ( key, value ) in request_args.items() ] )
return query
def DoHydrusBandwidth( service_key, method, command, size ):
try: service = wx.GetApp().GetManager( 'services' ).GetService( service_key )
except: return
service_type = service.GetServiceType()
if ( service_type, method, command ) in HC.BANDWIDTH_CONSUMING_REQUESTS: HydrusGlobals.pubsub.pub( 'service_updates_delayed', { service_key : [ HydrusData.ServiceUpdate( HC.SERVICE_UPDATE_REQUEST_MADE, size ) ] } )
def GenerateDumpMultipartFormDataCTAndBody( fields ):
m = multipart.Multipart()
for ( name, field_type, value ) in fields:
if field_type in ( CC.FIELD_TEXT, CC.FIELD_COMMENT, CC.FIELD_PASSWORD, CC.FIELD_VERIFICATION_RECAPTCHA, CC.FIELD_THREAD_ID ): m.field( name, HydrusData.ToBytes( value ) )
elif field_type == CC.FIELD_CHECKBOX:
if value:
# spoiler/on -> name : spoiler, value : on
# we don't say true/false for checkboxes
( name, value ) = name.split( '/', 1 )
m.field( name, value )
elif field_type == CC.FIELD_FILE:
( hash, mime, file ) = value
m.file( name, hash.encode( 'hex' ) + HC.mime_ext_lookup[ mime ], file, { 'Content-Type' : HC.mime_string_lookup[ mime ] } )
return m.get()
def GenerateMultipartFormDataCTAndBodyFromDict( fields ):
m = multipart.Multipart()
for ( name, value ) in fields.items(): m.field( name, HydrusData.ToBytes( value ) )
return m.get()
def ParseURL( url ):
try:
starts_http = url.startswith( 'http://' )
starts_https = url.startswith( 'https://' )
if not starts_http and not starts_https: url = 'http://' + url
parse_result = urlparse.urlparse( url )
scheme = parse_result.scheme
hostname = parse_result.hostname
port = parse_result.port
if hostname is None: location = None
else: location = ( scheme, hostname, port )
path = parse_result.path
# this happens when parsing 'index.html' rather than 'hostname/index.html' or '/index.html'
if not path.startswith( '/' ): path = '/' + path
query = parse_result.query
except: raise Exception( 'Could not parse that URL' )
return ( location, path, query )
class HTTPConnectionManager( object ):
def __init__( self ):
self._connections = {}
self._lock = threading.Lock()
threading.Thread( target = self.DAEMONMaintainConnections, name = 'Maintain Connections' ).start()
def _DoRequest( self, method, location, path, query, request_headers, body, follow_redirects = True, report_hooks = None, temp_path = None, num_redirects_permitted = 4, long_timeout = False ):
if report_hooks is None: report_hooks = []
connection = self._GetConnection( location, long_timeout )
try:
if query == '': path_and_query = path
else: path_and_query = path + '?' + query
with connection.lock:
( parsed_response, redirect_info, size_of_response, response_headers, cookies ) = connection.Request( method, path_and_query, request_headers, body, report_hooks = report_hooks, temp_path = temp_path )
if redirect_info is None or not follow_redirects: return ( parsed_response, size_of_response, response_headers, cookies )
else:
if num_redirects_permitted == 0: raise Exception( 'Too many redirects!' )
( new_method, new_url ) = redirect_info
( new_location, new_path, new_query ) = ParseURL( new_url )
if new_location is None: new_location = location
return self._DoRequest( new_method, new_location, new_path, new_query, request_headers, body, follow_redirects = follow_redirects, report_hooks = report_hooks, temp_path = temp_path, num_redirects_permitted = num_redirects_permitted - 1, long_timeout = long_timeout )
except:
time.sleep( 2 )
raise
def _GetConnection( self, location, long_timeout = False ):
with self._lock:
if long_timeout: return HTTPConnection( location, long_timeout )
else:
if location not in self._connections:
connection = HTTPConnection( location )
self._connections[ location ] = connection
return self._connections[ location ]
def Request( self, method, url, request_headers = None, body = '', return_everything = False, return_cookies = False, report_hooks = None, temp_path = None, long_timeout = False ):
if request_headers is None: request_headers = {}
( location, path, query ) = ParseURL( url )
follow_redirects = not return_cookies
( response, size_of_response, response_headers, cookies ) = self._DoRequest( method, location, path, query, request_headers, body, follow_redirects = follow_redirects, report_hooks = report_hooks, temp_path = temp_path, long_timeout = long_timeout )
if return_everything: return ( response, size_of_response, response_headers, cookies )
elif return_cookies: return ( response, cookies )
else: return response
def DAEMONMaintainConnections( self ):
while True:
if HydrusGlobals.shutdown: break
last_checked = 0
if HydrusData.GetNow() - last_checked > 30:
with self._lock:
connections_copy = dict( self._connections )
for ( location, connection ) in connections_copy.items():
with connection.lock:
if connection.IsStale():
del self._connections[ location ]
last_checked = HydrusData.GetNow()
time.sleep( 1 )
class HTTPConnection( object ):
read_block_size = 64 * 1024
def __init__( self, location, long_timeout = False ):
( self._scheme, self._host, self._port ) = location
if long_timeout: self._timeout = 600
else: self._timeout = 30
self.lock = threading.Lock()
self._last_request_time = HydrusData.GetNow()
self._RefreshConnection()
def _ParseCookies( self, raw_cookies_string ):
cookies = {}
if raw_cookies_string is not None:
raw_cookie_strings = raw_cookies_string.split( ', ' )
for raw_cookie_string in raw_cookie_strings:
try:
# HSID=AYQEVnDKrdst; Domain=.foo.com; Path=/; Expires=Wed, 13 Jan 2021 22:23:01 GMT; HttpOnly
if ';' in raw_cookie_string: ( raw_cookie_string, gumpf ) = raw_cookie_string.split( ';', 1 )
( cookie_name, cookie_value ) = raw_cookie_string.split( '=' )
cookies[ cookie_name ] = cookie_value
except Exception as e: pass
return cookies
def _ParseResponse( self, response, report_hooks ):
content_length = response.getheader( 'Content-Length' )
if content_length is not None: content_length = int( content_length )
data = ''
for block in HydrusData.ReadFileLikeAsBlocks( response, self.read_block_size ):
if HydrusGlobals.shutdown: raise Exception( 'Application is shutting down!' )
data += block
if content_length is not None and len( data ) > content_length:
raise Exception( 'Response was longer than suggested!' )
for hook in report_hooks:
hook( content_length, len( data ) )
size_of_response = len( data )
content_type = response.getheader( 'Content-Type' )
if content_type is None: parsed_response = data
else:
if '; ' in content_type: ( mime_string, additional_info ) = content_type.split( '; ', 1 )
else: ( mime_string, additional_info ) = ( content_type, '' )
if 'charset=' in additional_info:
# this does utf-8, ISO-8859-4, whatever
( gumpf, charset ) = additional_info.split( '=' )
try: parsed_response = data.decode( charset )
except: parsed_response = data
elif content_type == 'application/x-yaml':
try: parsed_response = yaml.safe_load( data )
except yaml.error.YAMLError as e:
raise HydrusExceptions.NetworkVersionException( 'Failed to parse a response object!' + os.linesep + HydrusData.ToString( e ) )
elif content_type == 'application/json':
parsed_response = HydrusSerialisable.CreateFromNetworkString( data )
elif content_type == 'text/html':
try: parsed_response = data.decode( 'utf-8' )
except: parsed_response = data
else: parsed_response = data
return ( parsed_response, size_of_response )
def _RefreshConnection( self ):
if self._scheme == 'http': self._connection = httplib.HTTPConnection( self._host, self._port, timeout = self._timeout )
elif self._scheme == 'https': self._connection = httplib.HTTPSConnection( self._host, self._port, timeout = self._timeout )
try: self._connection.connect()
except: raise Exception( 'Could not connect to ' + HydrusData.ToString( self._host ) + '!' )
def _WriteResponseToPath( self, response, temp_path, report_hooks ):
content_length = response.getheader( 'Content-Length' )
if content_length is not None: content_length = int( content_length )
size_of_response = 0
with open( temp_path, 'wb' ) as f:
for block in HydrusData.ReadFileLikeAsBlocks( response, self.read_block_size ):
if HydrusGlobals.shutdown: raise Exception( 'Application is shutting down!' )
size_of_response += len( block )
if content_length is not None and size_of_response > content_length:
raise Exception( 'Response was longer than suggested!' )
f.write( block )
for hook in report_hooks:
hook( content_length, size_of_response )
return size_of_response
def IsStale( self ):
time_since_last_request = HydrusData.GetNow() - self._last_request_time
return time_since_last_request > self._timeout
def Request( self, method, path_and_query, request_headers, body, report_hooks = None, temp_path = None ):
if report_hooks is None: report_hooks = []
if method == HC.GET: method_string = 'GET'
elif method == HC.POST: method_string = 'POST'
if 'User-Agent' not in request_headers: request_headers[ 'User-Agent' ] = 'hydrus/' + HydrusData.ToString( HC.NETWORK_VERSION )
# it is important to only send str, not unicode, to httplib
# it uses += to extend the message body, which propagates the unicode (and thus fails) when
# you try to push non-ascii bytes as the body (e.g. during a file upload!)
method_string = str( method_string )
path_and_query = str( path_and_query )
request_headers = { str( k ) : str( v ) for ( k, v ) in request_headers.items() }
try:
self._connection.request( method_string, path_and_query, headers = request_headers, body = body )
response = self._connection.getresponse()
except ( httplib.CannotSendRequest, httplib.BadStatusLine ):
# for some reason, we can't send a request on the current connection, so let's make a new one and try again!
self._RefreshConnection()
self._connection.request( method_string, path_and_query, headers = request_headers, body = body )
response = self._connection.getresponse()
if response.status == 200 and temp_path is not None:
size_of_response = self._WriteResponseToPath( response, temp_path, report_hooks )
parsed_response = 'response written to temporary file'
else:
( parsed_response, size_of_response ) = self._ParseResponse( response, report_hooks )
response_headers = { k : v for ( k, v ) in response.getheaders() if k != 'set-cookie' }
cookies = self._ParseCookies( response.getheader( 'set-cookie' ) )
self._last_request_time = HydrusData.GetNow()
if response.status == 200: return ( parsed_response, None, size_of_response, response_headers, cookies )
elif response.status in ( 301, 302, 303, 307 ):
location = response.getheader( 'Location' )
if location is None: raise Exception( 'Received an invalid redirection response.' )
else:
url = location
if ' ' in url:
# some booru is giving daft redirect responses
print( url )
url = urllib.quote( url, safe = '/?=&' )
print( url )
if not url.startswith( self._scheme ):
# assume it is like 'index.php' or '/index.php', rather than 'http://blah.com/index.php'
if url.startswith( '/' ): slash_sep = ''
else: slash_sep = '/'
url = self._scheme + '://' + self._host + slash_sep + url
if response.status in ( 301, 307 ):
# 301: moved permanently, repeat request
# 307: moved temporarily, repeat request
redirect_info = ( method, url )
elif response.status in ( 302, 303 ):
# 302: moved temporarily, repeat request (except everyone treats it like 303 for no good fucking reason)
# 303: thanks, now go here with GET
redirect_info = ( HC.GET, url )
return ( parsed_response, redirect_info, size_of_response, response_headers, cookies )
elif response.status == 304: raise HydrusExceptions.NotModifiedException()
else:
if response.status == 401: raise HydrusExceptions.PermissionException( parsed_response )
elif response.status == 403: raise HydrusExceptions.ForbiddenException( parsed_response )
elif response.status == 404: raise HydrusExceptions.NotFoundException( parsed_response )
elif response.status == 419: raise HydrusExceptions.SessionException( parsed_response )
elif response.status == 426: raise HydrusExceptions.NetworkVersionException( parsed_response )
elif response.status in ( 500, 501, 502, 503 ): raise Exception( parsed_response )
else: raise Exception( parsed_response )