为了账号安全,请及时绑定邮箱和手机立即绑定

Python请求和持久会话

Python请求和持久会话

POPMUISE 2019-10-15 14:56:37
我正在使用请求模块(Python 2.5的版本0.10.0)。我已经弄清楚了如何将数据提交到网站上的登录表单并检索会话密钥,但是我看不到在后续请求中使用此会话密钥的明显方法。有人可以在下面的代码中填写省略号还是建议其他方法?>>> import requests>>> login_data =  {'formPosted':'1', 'login_email':'me@example.com', 'password':'pw'}>>> r = requests.post('https://localhost/login.py', login_data)>>> >>> r.textu'You are being redirected <a href="profilePage?_ck=1349394964">here</a>'>>> r.cookies{'session_id_myapp': '127-0-0-1-825ff22a-6ed1-453b-aebc-5d3cf2987065'}>>> >>> r2 = requests.get('https://localhost/profile_data.json', ...)
查看完整描述

3 回答

?
HUH函数

TA贡献1836条经验 获得超4个赞

其他答案有助于了解如何维护此类会话。另外,我想提供一个类,该类可以使会话在脚本的不同运行(带有缓存文件)上得以维护。这意味着仅在需要时才执行正确的“登录”(超时或缓存中不存在会话)。它还支持在随后的“ get”或“ post”调用中的代理设置。


已通过Python3测试。


使用它作为您自己的代码的基础。GPL v3发行了以下片段


import pickle

import datetime

import os

from urllib.parse import urlparse

import requests    


class MyLoginSession:

    """

    a class which handles and saves login sessions. It also keeps track of proxy settings.

    It does also maintine a cache-file for restoring session data from earlier

    script executions.

    """

    def __init__(self,

                 loginUrl,

                 loginData,

                 loginTestUrl,

                 loginTestString,

                 sessionFileAppendix = '_session.dat',

                 maxSessionTimeSeconds = 30 * 60,

                 proxies = None,

                 userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',

                 debug = True,

                 forceLogin = False,

                 **kwargs):

        """

        save some information needed to login the session


        you'll have to provide 'loginTestString' which will be looked for in the

        responses html to make sure, you've properly been logged in


        'proxies' is of format { 'https' : 'https://user:pass@server:port', 'http' : ...

        'loginData' will be sent as post data (dictionary of id : value).

        'maxSessionTimeSeconds' will be used to determine when to re-login.

        """

        urlData = urlparse(loginUrl)


        self.proxies = proxies

        self.loginData = loginData

        self.loginUrl = loginUrl

        self.loginTestUrl = loginTestUrl

        self.maxSessionTime = maxSessionTimeSeconds

        self.sessionFile = urlData.netloc + sessionFileAppendix

        self.userAgent = userAgent

        self.loginTestString = loginTestString

        self.debug = debug


        self.login(forceLogin, **kwargs)


    def modification_date(self, filename):

        """

        return last file modification date as datetime object

        """

        t = os.path.getmtime(filename)

        return datetime.datetime.fromtimestamp(t)


    def login(self, forceLogin = False, **kwargs):

        """

        login to a session. Try to read last saved session from cache file. If this fails

        do proper login. If the last cache access was too old, also perform a proper login.

        Always updates session cache file.

        """

        wasReadFromCache = False

        if self.debug:

            print('loading or generating session...')

        if os.path.exists(self.sessionFile) and not forceLogin:

            time = self.modification_date(self.sessionFile)         


            # only load if file less than 30 minutes old

            lastModification = (datetime.datetime.now() - time).seconds

            if lastModification < self.maxSessionTime:

                with open(self.sessionFile, "rb") as f:

                    self.session = pickle.load(f)

                    wasReadFromCache = True

                    if self.debug:

                        print("loaded session from cache (last access %ds ago) "

                              % lastModification)

        if not wasReadFromCache:

            self.session = requests.Session()

            self.session.headers.update({'user-agent' : self.userAgent})

            res = self.session.post(self.loginUrl, data = self.loginData, 

                                    proxies = self.proxies, **kwargs)


            if self.debug:

                print('created new session with login' )

            self.saveSessionToCache()


        # test login

        res = self.session.get(self.loginTestUrl)

        if res.text.lower().find(self.loginTestString.lower()) < 0:

            raise Exception("could not log into provided site '%s'"

                            " (did not find successful login string)"

                            % self.loginUrl)


    def saveSessionToCache(self):

        """

        save session to a cache file

        """

        # always save (to update timeout)

        with open(self.sessionFile, "wb") as f:

            pickle.dump(self.session, f)

            if self.debug:

                print('updated session cache-file %s' % self.sessionFile)


    def retrieveContent(self, url, method = "get", postData = None, **kwargs):

        """

        return the content of the url with respect to the session.


        If 'method' is not 'get', the url will be called with 'postData'

        as a post request.

        """

        if method == 'get':

            res = self.session.get(url , proxies = self.proxies, **kwargs)

        else:

            res = self.session.post(url , data = postData, proxies = self.proxies, **kwargs)


        # the session has been updated on the server, so also update in cache

        self.saveSessionToCache()            


        return res

使用上述类的代码片段可能如下所示:


if __name__ == "__main__":

    # proxies = {'https' : 'https://user:pass@server:port',

    #           'http' : 'http://user:pass@server:port'}


    loginData = {'user' : 'usr',

                 'password' :  'pwd'}


    loginUrl = 'https://...'

    loginTestUrl = 'https://...'

    successStr = 'Hello Tom'

    s = MyLoginSession(loginUrl, loginData, loginTestUrl, successStr, 

                       #proxies = proxies

                       )


    res = s.retrieveContent('https://....')

    print(res.text)


    # if, for instance, login via JSON values required try this:

    s = MyLoginSession(loginUrl, None, loginTestUrl, successStr, 

                       #proxies = proxies,

                       json = loginData)


查看完整回答
反对 回复 2019-10-15
  • 3 回答
  • 0 关注
  • 521 浏览
慕课专栏
更多

添加回答

举报

0/150
提交
取消
意见反馈 帮助中心 APP下载
官方微信