Repos / pytaku / 8e5f14292e
commit 8e5f14292e1fb641f81828f67c532519cc5d53b5
Author: Bùi Thành Nhân <hi@imnhan.com>
Date:   Sun May 24 23:08:45 2020 +0700

    proxied GET requests

diff --git a/src/pytaku/conf.py b/src/pytaku/conf.py
index 6db70cc..ebe84b7 100644
--- a/src/pytaku/conf.py
+++ b/src/pytaku/conf.py
@@ -17,7 +17,23 @@ class Config(GoodConf):
     AWS_SECRET_ACCESS_KEY = Value()
     AWS_STORAGE_BUCKET_NAME = Value(default="pytaku")
     AWS_DEFAULT_ACL = Value(default="public-read")
-    AWS_S3_ENDPOINT_URL = Value(default="")
+    AWS_S3_ENDPOINT_URL = Value()
+
+    # Makeshift https "proxy" running on google cloud functions:
+    GCF_PROXY_PROJECT_NAME = Value()
+    GCF_PROXY_FUNCTION_NAME = Value()
+    GCF_PROXY_PASSWORD = Value()
+    GCF_PROXY_REGIONS = Value(
+        default=[
+            "asia-east2",
+            "asia-northeast1",
+            "europe-west1",
+            "europe-west2",
+            "us-central1",
+            "us-east1",
+            "us-east4",
+        ]
+    )
 
 
 config = Config(default_files=["pytaku.conf.json"])
diff --git a/src/pytaku/settings.py b/src/pytaku/settings.py
index 3795e7d..3d3a2fc 100644
--- a/src/pytaku/settings.py
+++ b/src/pytaku/settings.py
@@ -131,3 +131,9 @@
 AWS_STORAGE_BUCKET_NAME = config.AWS_STORAGE_BUCKET_NAME
 AWS_S3_ENDPOINT_URL = config.AWS_S3_ENDPOINT_URL or None
 AWS_DEFAULT_ACL = None  # use bucket's default policy
+
+
+GCF_PROXY_PROJECT_NAME = config.GCF_PROXY_PROJECT_NAME
+GCF_PROXY_FUNCTION_NAME = config.GCF_PROXY_FUNCTION_NAME
+GCF_PROXY_PASSWORD = config.GCF_PROXY_PASSWORD
+GCF_PROXY_REGIONS = config.GCF_PROXY_REGIONS
diff --git a/src/pytaku_scraper/commands.py b/src/pytaku_scraper/commands.py
index b992209..3b4d0fa 100644
--- a/src/pytaku_scraper/commands.py
+++ b/src/pytaku_scraper/commands.py
@@ -1,7 +1,7 @@
-import requests
 from django.db import transaction
 from django.utils import timezone
 
+from .httpclient import HttpClient
 from .models import DownloadResult, TaskQueue
 from .sites.mangadex import get_latest_id
 
@@ -20,14 +20,15 @@ def put_download_tasks():
     print(f'Successfully put {len(result)} "download" tasks.')
 
 
-def download_worker():
+def download_worker(proxy_index):
+    http = HttpClient(proxy_index)
     while True:
         with transaction.atomic():
             task = TaskQueue.pop("download")
             task_id = task.id
             print(f"Processing task {task_id}: {task.payload}")
-            resp = requests.get(task.payload["url"], timeout=30)
-            assert resp.status_code in (200, 404), f"Unexpected error: {resp.text}"
+            resp = http.proxied_get(task.payload["url"])
+            assert resp.status_code in (200, 404), f"Unexpected DL error: {resp.text}"
 
             DownloadResult.objects.update_or_create(
                 url=task.payload["url"],
diff --git a/src/pytaku_scraper/httpclient.py b/src/pytaku_scraper/httpclient.py
new file mode 100644
index 0000000..bf9f2d7
--- /dev/null
+++ b/src/pytaku_scraper/httpclient.py
@@ -0,0 +1,30 @@
+import requests
+from django.conf import settings
+
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
+}
+
+
+class HttpClient:
+    def __init__(self, proxy_index=0):
+        proxy_region = settings.GCF_PROXY_REGIONS[proxy_index]
+        self.proxy_url = "https://{}-{}.cloudfunctions.net/{}".format(
+            proxy_region,
+            settings.GCF_PROXY_PROJECT_NAME,
+            settings.GCF_PROXY_FUNCTION_NAME,
+        )
+        print("HttpClient proxy region:", proxy_region)
+
+    def proxied_get(self, url, timeout=10):
+        return requests.post(
+            self.proxy_url,
+            json={
+                "url": url,
+                "method": "get",
+                "body": None,
+                "headers": HEADERS,
+                "password": settings.GCF_PROXY_PASSWORD,
+            },
+            timeout=timeout,
+        )
diff --git a/src/pytaku_scraper/management/commands/download_worker.py b/src/pytaku_scraper/management/commands/download_worker.py
index 87f3f75..bf357b0 100644
--- a/src/pytaku_scraper/management/commands/download_worker.py
+++ b/src/pytaku_scraper/management/commands/download_worker.py
@@ -6,5 +6,8 @@
 class Command(BaseCommand):
     help = "Download worker. Run as many as needed."
 
+    def add_arguments(self, parser):
+        parser.add_argument("proxy_index", type=int)
+
     def handle(self, *args, **options):
-        download_worker()
+        download_worker(options["proxy_index"])