Python script for uploading RSS feeds

The Python script allows you to upload an RSS feed to Yandex.Webmaster. The script consistently sends requests to the Yandex.Turbo pages API and reports the result of the RSS feed upload.

To work with the script, you only need to specify the site address, your OAuth token and the RSS feed content. Other data (user-id, host-id and so on) are received by the script automatically.

Setting up the upload mode
The upload mode is set in the get_rss_upload_path function when declaring the path variable. The debugging mode DEBUG is set by default.
...
def get_rss_upload_path(user_id, host_id):
    path = '/user/{user_id}/hosts/{host_id}/turbo/uploadAddress/?mode={mode}'.format(
        user_id=user_id, host_id=host_id, mode='DEBUG')
...
To publish Turbo pages, set the PRODUCTION mode.
...
def get_rss_upload_path(user_id, host_id):
    path = '/user/{user_id}/hosts/{host_id}/turbo/uploadAddress/?mode={mode}'.format(
        user_id=user_id, host_id=host_id, mode='PRODUCTION')
...
Setting up compression
To send the RSS feed in a compressed form, specify the Content-Encoding: gzip title in the upload_rss function.
...
def upload_rss(upload_path, rss_data):
    headers = {
        'Content-Type': 'application/rss+xml'
        'Content-Encoding': 'gzip'
    }
...

Using the script

To load the RSS feed, add your own data to the script:
  • The URL of the site the RSS feed is loaded for.
  • OAuth token.
  • RSS feed content.

    For a test run you can use the sample RSS feed.
    Example
    <?xml version = "1.0" encoding = "UTF-8"?>
    <rss version="2.0" xmlns:yandex="http://news.yandex.ru" xmlns:turbo="http://turbo.yandex.ru">
      <channel>
        <item turbo="true">
          <title>Page title</title>
          <link>https://example.com</link>
          <turbo:content>
            <![CDATA[
              <header>
                <h1>The Healthy breakfast restaurant</h1>
                <h2>Healthy and delicious</h2>
                <figure>
                  <img src="https://avatars.mds.yandex.net/get-sbs-sd/403988/e6f459c3-8ada-44bf-a6c9-dbceb60f3757/orig">
                </figure>
                <menu>
                  <a href="https://example.com/page1.html">Menu entry  1</a>
                  <a href="https://example.com/page2.html">Menu entry  2</a>
                </menu>
              </header>
              <p>What is a good way to start the day? Have a delicious and healthy breakfast!</p>
              <p>Come to us for breakfast. Find the photos of our dishes <a href="#">on our site</a>.</p>
              <h2>Menu</h2>
              <figure>
                <img src="https://avatars.mds.yandex.net/get-sbs-sd/369181/49e3683c-ef58-4067-91f9-786222aa0e65/orig">
                <figcaption>Omelette with herbs</figcaption>
              </figure>
              <p>Our menu always contains fresh, tasty and healthydishes.</p>
              <p>Find it out yourself.</p>
              <button formaction="tel:+7(123)456-78-90"
                data-background-color="#5B97B0"
                data-color="white"
                data-primary="true">Reserve a table</button>
              <div data-block="widget-feedback" data-stick="false">
                <div data-block="chat" data-type="whatsapp" data-url="https://whatsapp.com"></div>
                <div data-block="chat" data-type="telegram" data-url="http://telegram.com/"></div>
                <div data-block="chat" data-type="vkontakte" data-url="https://vk.com/"></div>
                <div data-block="chat" data-type="facebook" data-url="https://facebook.com"></div>
                <div data-block="chat" data-type="viber" data-url="https://viber.com"></div>
              </div>
              <p>Our address: <a href="#">Nullam dolor massa, porta a nulla in, ultricies vehicula arcu.</a></p>
              <p>Photos — http://unsplash.com</p>
            ]]>
          </turbo:content>
        </item>
      </channel>
    </rss>
import jsonimport pprintimport timefrom urlparse import urlparseimport requestsfrom requests import HTTPErrorHOST_ADDRESS = 'Your site URL For example, https://example.com'
OAUTH_TOKEN = 'Your OAuth token'
RSS_STRING = 'The RSS feed content'

AUTH_HEADER = {
    'Authorization': 'OAuth %s' % OAUTH_TOKEN
}

SESSION = requests.Session()
SESSION.headers.update(AUTH_HEADER)

API_VERSION = 'v4'
API_BASE_URL = 'https://api.webmaster.yandex.net'
API_URL = API_BASE_URL + '/' + API_VERSION


def validate_api_response(response, required_key_name=None):
    content_type = response.headers['Content-Type']
    content = json.loads(response.text) if 'application/json' in content_type else None

    if response.status_code == 200:
        if required_key_name and required_key_name not in content:
            raise HTTPError('Unexpected API response. Missing required key: %s' % required_key_name, response=response)
    elif content and 'error_message' in content:
        raise HTTPError('Error API response. Error message: %s' % content['error_message'], response=response)
    else:
        response.raise_for_status()

    return content


def url_to_host_id(url):
    parsed_url = urlparse(url)

    scheme = parsed_url.scheme
    if not scheme:
        raise ValueError('No protocol (https or http) in url')

    if scheme != 'http' and scheme != 'https':
        raise ValueError('Illegal protocol: %s' % scheme)

    port = parsed_url.port
    if not port:
        port = 80 if scheme == 'http' else 443

    hostname = parsed_url.hostname
    hostname = hostname.encode('idna').rstrip('.').lower()

    return scheme + ':' + hostname + ':' + str(port)


def get_user_id():
    r = SESSION.get(API_URL + '/user/')
    c = validate_api_response(r, 'user_id')

    return c['user_id']


def get_user_host_ids(user_id):
    path = '/user/{user_id}/hosts'.format(user_id=user_id)
    r = SESSION.get(API_URL + path)
    c = validate_api_response(r, 'hosts')

    host_ids = [host_info['host_id'] for host_info in c['hosts']]

    return host_ids


def is_user_host_id(user_id, host_id):
    host_ids = get_user_host_ids(user_id)

    return host_id in host_ids


def get_rss_upload_path(user_id, host_id):
    path = '/user/{user_id}/hosts/{host_id}/turbo/uploadAddress/?mode={mode}'.format(        user_id=user_id, host_id=host_id, mode='DEBUG')    r = SESSION.get(API_URL + path)    c = validate_api_response(r, 'upload_address')    parsed_url = c['upload_address']    return parsed_urldef upload_rss(upload_path, rss_data):    headers = {        'Content-Type': 'application/rss+xml'    }    r = SESSION.post(url=upload_path, data=rss_data, headers=headers)    c = validate_api_response(r, 'task_id')    return c['task_id']def get_task_info(user_id, host_id, task_id):    path = '/user/{user_id}/hosts/{host_id}/turbo/tasks/{task_id}'.format(        user_id=user_id, host_id=host_id, task_id=task_id)    r = SESSION.get(API_URL + path)    c = validate_api_response(r)    return cdef retry_call_until(func, predicate, max_tries=5, initial_delay=60, backoff=2):    current_delay = initial_delay    ret_val = None    for n_try in xrange(0, max_tries + 1):        ret_val = func()        if predicate(ret_val):            break        print 'Will retry. Sleeping for %ds' % current_delay
        time.sleep(current_delay)
        current_delay *= backoff

    return ret_val


user_id = get_user_id()
host_id = url_to_host_id(HOST_ADDRESS)
upload_path = get_rss_upload_path(user_id, host_id)
task_id = upload_rss(upload_path, RSS_STRING)

print 'Waiting for the upload task to complete. This will take a while...'task_info = retry_call_until(    func=lambda: get_task_info(user_id, host_id, task_id),    predicate=lambda task_info: task_info['load_status'] != 'PROCESSING')

print 'Task status: %s' % task_info['load_status']
task_info = get_task_info(user_id, host_id, task_id)
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(task_info)