From c0d28da1c3ff5ba916f98c6549f5b5afb00928ba Mon Sep 17 00:00:00 2001
From: varuy322 <varuy322@163.com>
Date: Fri, 11 Nov 2022 10:48:33 +0800
Subject: [PATCH 01/29] refactor: v0.0.3, env is prod

---
 opendatalab/__version__.py |   4 +-
 opendatalab/cli/info.py    | 102 +++++++++++++++++--------------------
 opendatalab/cli/login.py   |   1 -
 opendatalab/cli/logout.py  |   2 +-
 opendatalab/cli/search.py  |  33 ++++++++----
 opendatalab/client/api.py  |  41 ++++-----------
 opendatalab/exception.py   |   2 +-
 setup.cfg                  |   8 +--
 setup.py                   |   4 +-
 test-requirements.txt      |   1 -
 tests/demo.py              |  15 +++---
 11 files changed, 98 insertions(+), 115 deletions(-)
 delete mode 100644 test-requirements.txt

diff --git a/opendatalab/__version__.py b/opendatalab/__version__.py
index 9e1f177..6d1372b 100644
--- a/opendatalab/__version__.py
+++ b/opendatalab/__version__.py
@@ -8,7 +8,7 @@
 """OpenDataLab python SDK version info."""
 
 __url__ = "https://opendatalab.org.cn"
-__version__ = "0.0.2"
-__svc__ = '1.8'
+__version__ = "0.0.3"
+__svc__ = '2.0'
 odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
 uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"
diff --git a/opendatalab/cli/info.py b/opendatalab/cli/info.py
index afff4d4..67e9870 100644
--- a/opendatalab/cli/info.py
+++ b/opendatalab/cli/info.py
@@ -11,90 +11,82 @@
 from opendatalab.utils import bytes2human
 
 
-@exception_handler
-def implement_info(obj: ContextInfo, dataset: str) -> None:
-    """
-    implement for displaying dataset info
-    Args:
-        obj (ContextInfo): context object
-        dataset (str): dataset name
+def _format_types(info_data, type_name):
+    types_str = ""
+    if type_name in info_data['attrs'].keys():
+        types_list = info_data['attrs'][type_name]
+        if types_list and len(types_list) > 0:
+            types_str = ", ".join([x['name']['en'] for x in types_list])
 
-    Returns:
+    return types_str
 
-    """
-    client = obj.get_client()
-    odl_api = client.get_api()
-    info_data = odl_api.get_info(dataset)
-    similar_data_list = odl_api.get_similar_dataset(dataset)
 
-    data_introd = info_data['introduction']
+def reformat_info_data(info_data):
+    license_str = _format_types(info_data, 'license')
+    publisher_str = _format_types(info_data, 'publisher')
+    media_types_str = _format_types(info_data, 'mediaTypes')
+    label_types_str = _format_types(info_data, 'labelTypes')
+    task_types_str = _format_types(info_data, 'taskTypes')
+    tags_str = _format_types(info_data, 'tags')
+
+    data_introduction = info_data['introduction']['en']
     introduction_str = ""
-    if data_introd and len(data_introd) > 0:
-        introduction_str = data_introd[:97] + '...'
-
-    license_list = info_data['licenses']
-    license_str = ""
-    if license_list and len(license_list) > 0:
-        license_str = ", ".join([x['name'] for x in license_list])
-
-    publisher_list = info_data['publisher']
-    publisher_str = ""
-    if publisher_list and len(publisher_list) > 0:
-        publisher_str = ", ".join([x['name'] for x in publisher_list])
-
-    media_types_list = info_data['mediaTypes']
-    media_types_str = ""
-    if media_types_list and len(media_types_list) > 0:
-        media_types_str = ", ".join([x['name'] for x in media_types_list])
-
-    label_types_list = info_data['labelTypes']
-    label_types_str = ""
-    if label_types_list and len(label_types_list) > 0:
-        label_types_str = ", ".join([x['name'] for x in label_types_list])
-
-    task_types_list = info_data['taskTypes']
-    task_types_str = ""
-    if label_types_list and len(task_types_list) > 0:
-        task_types_str = ", ".join([x['name'] for x in task_types_list])
-
-    tags_list = info_data['tags']
-    tags_str = ""
-    if tags_list and len(tags_list) > 0:
-        tags_str = ", ".join([x['name'] for x in tags_list])
-
-    citation_data = info_data['citation']
+    if data_introduction and len(data_introduction) > 0:
+        introduction_str = data_introduction[:97] + '...'
+
+    citation_data = info_data['attrs']['citation']
     citation_str = ""
     if citation_data and len(citation_data) > 0:
         citation_str = citation_data.strip("```").replace('\r', '').replace('\n', '')
 
+    similar_data_list = info_data['similar']
     similar_ds_str = ""
     if similar_data_list and len(similar_data_list) > 0:
         similar_ds_str = ", ".join([x['name'] for x in similar_data_list])
 
-    info_data = {
+    info_data_result = {
         'Name': info_data['name'],
-        'File Bytes': str(bytes2human(info_data['fileBytes'])),
-        'File Count': str(info_data['fileCount']),
+        'File Bytes': str(bytes2human(info_data['attrs']['fileBytes'])),
+        'File Count': str(info_data['attrs']['fileCount']),
         'Introduction': introduction_str,
-        'Issue Time': info_data['publishDate'],
+        'Issue Time': info_data['attrs']['publishDate'],
         'License': license_str,
         'Author': publisher_str,
         'Data Type': media_types_str,
         'Label Type': label_types_str,
         'Task Type': task_types_str,
         'Tags': tags_str,
-        'HomePage': info_data['publishUrl'],
+        'HomePage': info_data['attrs']['publishUrl'],
         'Citation': citation_str,
         'Similar Datasets': similar_ds_str,
     }
 
+    return info_data_result
+
+
+@exception_handler
+def implement_info(obj: ContextInfo, dataset: str) -> None:
+    """
+    implement for displaying dataset info
+    Args:
+        obj (ContextInfo): context object
+        dataset (str): dataset name
+
+    Returns:
+
+    """
+    client = obj.get_client()
+    odl_api = client.get_api()
+    info_data = odl_api.get_info(dataset)
+
+    info_data_result = reformat_info_data(info_data)
     console = Console()
     table = Table(show_header=True, header_style='bold cyan', box=box.ASCII2)
     table.add_column("Field", width=20, justify='full', overflow='fold')
     table.add_column("Content", width=120, justify='full', overflow='fold')
 
-    for key in info_data.keys():
-        val = info_data[key]
+    for key in info_data_result.keys():
+        val = info_data_result[key]
         val = "" if not val else val
         table.add_row(key, val, end_section=True)
 
diff --git a/opendatalab/cli/login.py b/opendatalab/cli/login.py
index b61c570..0852374 100644
--- a/opendatalab/cli/login.py
+++ b/opendatalab/cli/login.py
@@ -10,7 +10,6 @@ def implement_login(obj: ContextInfo, username: str, password: str) -> None:
     try:
         client = obj.get_client()
         odl_api = client.get_api()
-        # config_json = odl_api.login(username=username, password=password)
         config_json = odl_api.odl_auth(account=username, password=password)
         obj.update_config(config_json)
 
diff --git a/opendatalab/cli/logout.py b/opendatalab/cli/logout.py
index b6557bf..ed756d8 100644
--- a/opendatalab/cli/logout.py
+++ b/opendatalab/cli/logout.py
@@ -7,7 +7,7 @@
 
 @exception_handler
 def implement_logout(obj: ContextInfo) -> None:
-    
+    ##TODO: add /api/users/sync/logout
     config_content = obj.get_config_content()
     username = ""
     if 'user.email' in config_content.keys():
diff --git a/opendatalab/cli/search.py b/opendatalab/cli/search.py
index fd22a3a..c930595 100644
--- a/opendatalab/cli/search.py
+++ b/opendatalab/cli/search.py
@@ -66,18 +66,33 @@ def implement_search(obj: ContextInfo, keywords: str) -> None:
         for _, res in enumerate(result_list):
             ds_name = res['name']
             ds_name_rich = rich_content_str(keywords=keywords, content=ds_name)
-            ds_data_types = ','.join([dmt['name'] for dmt in res['mediaTypes']])
-            ds_file_byte = bytes2human(res['fileBytes'])
-            ds_file_count = res['fileCount']
-            ds_task_types = ','.join([dtt['name'] for dtt in res['taskTypes']])
-            ds_task_types_rich = rich_content_str(keywords=keywords, content=ds_task_types)
-            ds_label_types = ','.join([dlt['name'] for dlt in res['labelTypes']])
-            ds_label_types_rich = rich_content_str(keywords=keywords, content=ds_label_types)
             ds_view_count = res['viewCount']
-            ds_desc = res['introductionText'][:97] + '...'
+            ds_desc = res['introduction']['en'][:97] + '...'
             ds_desc_rich = rich_content_str(keywords=keywords, content=ds_desc)
 
+            ds_attr_info = res['attrs']
+            ds_file_byte = bytes2human(ds_attr_info['fileBytes'])
+            ds_file_count = ds_attr_info['fileCount']
+
+            ds_data_types = _get_complex_types_str(ds_attr_info, 'mediaTypes')
+            ds_task_types = _get_complex_types_str(ds_attr_info, 'taskTypes')
+            ds_label_types = _get_complex_types_str(ds_attr_info, 'labelTypes')
+
+            ds_task_types_rich = rich_content_str(keywords=keywords, content=ds_task_types)
+            ds_label_types_rich = rich_content_str(keywords=keywords, content=ds_label_types)
+
             table.add_row(ds_name_rich, ds_data_types, str(ds_file_byte), str(ds_file_count), ds_task_types_rich,
                           ds_label_types_rich, str(ds_view_count), ds_desc_rich, end_section=True)
 
-    console.print(table)
\ No newline at end of file
+    console.print(table)
+
+
+def _get_complex_types_str(ds_attr_info, type_name):
+    if not (ds_attr_info or type_name):
+        return ""
+
+    if type_name in ds_attr_info.keys():
+        type_list = ds_attr_info[type_name]
+        return ','.join([d['name']['en'] for d in type_list])
+    else:
+        return ""
diff --git a/opendatalab/client/api.py b/opendatalab/client/api.py
index 189e5fa..584857e 100644
--- a/opendatalab/client/api.py
+++ b/opendatalab/client/api.py
@@ -58,42 +58,19 @@ def get_dataset_sts(self, dataset, expires=900):
         # print(f"sts api, headers: {resp.headers}, text: {resp.text}")
         return resp.json()["data"]
 
-    @DeprecationWarning
-    def login(self, username: str, password: str):
-        data = {
-            "email": username,
-            "password": password,
-        }
-        data = json.dumps(data)
-        resp = requests.post(
-            f"{self.host}/api/users/login",
-            data=data,
-            headers={"Content-Type": "application/json"},
-        )
-        if resp.status_code != 200:
-            raise OdlAuthError(resp.status_code, resp.text)
-
-        cookies_dict = requests.utils.dict_from_cookiejar(resp.cookies)
-
-        if 'opendatalab_session' in cookies_dict.keys():
-            opendatalab_session = cookies_dict['opendatalab_session']
-        else:
-            raise OpenDataLabError(resp.status_code, "No opendatalab_session")
-
-        config_json = {
-            'user.email': username,
-            'user.token': opendatalab_session,
-        }
-
-        return config_json
-
     def search_dataset(self, keywords):
-        resp = requests.get(
-            f"{self.host}/api/datasets/?pageSize=25&keywords={keywords}",
+        resp = requests.post(  # f"{self.host}/api/datasets/?pageSize=25&keywords={keywords}",
+            f"{self.host}/api/datasets/list",
             headers={"X-OPENDATALAB-API-TOKEN": self.token,
                      "Cookie": f"opendatalab_session={self.odl_cookie}",
                      "User-Agent": f"opendatalab-python-sdk/{__version__}",
                      },
+            data=json.dumps({
+                "backend": False,
+                "keywords": keywords,
+                "pageSize": 25,
+                "state": ["online"],
+            })
         )
         if resp.status_code != 200:
             print(f"{OpenDataLabError(resp.status_code, resp.text)}")
@@ -124,7 +101,7 @@ def get_similar_dataset(self, dataset):
 
     def get_info(self, dataset):
         resp = requests.get(
-            f"{self.host}/api/datasets/{dataset}",
+            f"{self.host}/api/datasets/{dataset}?backend=false",
             headers={"X-OPENDATALAB-API-TOKEN": self.token,
                      "Cookie": f"opendatalab_session={self.odl_cookie}",
                      "User-Agent": f"opendatalab-python-sdk/{__version__}",
diff --git a/opendatalab/exception.py b/opendatalab/exception.py
index 8c7b434..f064ec0 100644
--- a/opendatalab/exception.py
+++ b/opendatalab/exception.py
@@ -27,7 +27,7 @@ class RespError(OpenDataLabError):
 
     """
     STATUS_CODE: int
-    _INDENT = " " * len(__qualname__)
+    _INDENT = " " * len(__qualname__)  # type: ignore
 
     def __init__(self, resp_code: Optional[int] = None, error_msg: str = ""):
         super().__init__(resp_code, error_msg)
diff --git a/setup.cfg b/setup.cfg
index 2412b11..de15d29 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,12 +4,12 @@
 
 [metadata]
 name = opendatalab
-url = https://github.com/opendatalab/opendatalab-python-sdk/-/tree/main
-author = opendatalab
-author_email = wangrui@pjlab.org.cn
+url = https://github.com/opendatalab/opendatalab-python-sdk
+author = OpenDataLab
+author_email = OpenDataLab@pjlab.org.cn
 license = MIT
 license_file = LICENSE
-keywords = opendatalab, dataset
+keywords = opendatalab, dataset, test
 description = OpenDataLab Python SDK
 long_description = file: README.md
 long_description_content_type = text/markdown
diff --git a/setup.py b/setup.py
index 345b545..9e866e6 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,7 @@
 import os
 import setuptools
 
-about = {}
+about = {}  # type: ignore
 here = os.path.abspath(os.path.dirname(__file__))
 with open(
     os.path.join(here, "opendatalab", "__version__.py"), "r", encoding="utf-8"
@@ -14,6 +14,6 @@
 setuptools.setup(
     version=about["__version__"],
     project_urls={
-        "Bug Tracker": "https://github.com/opendatalab/opendatalab-python-sdk/-/tree/main/-/issues",
+        "Bug Tracker": "https://github.com/opendatalab/opendatalab-python-sdk/issues",
     },
 )
diff --git a/test-requirements.txt b/test-requirements.txt
deleted file mode 100644
index e079f8a..0000000
--- a/test-requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-pytest
diff --git a/tests/demo.py b/tests/demo.py
index 5cd0484..22efd72 100644
--- a/tests/demo.py
+++ b/tests/demo.py
@@ -18,17 +18,17 @@
     odl_api = client.get_api()
 
     # 0. login with account
-    # account = "xxxxx"  # your username
-    # pw = "xxxxx"  # your password
-    # print(f'*****'*8)
-    # implement_login(ctx, account, pw)
+    account = "xxx@pjlab.org.cn"  # your username
+    pw = "xxxx"  # your password
+    print(f'*****'*8)
+    implement_login(ctx, account, pw)
 
     # 1. search demo    
-    res_list = odl_api.search_dataset("coco")
+    res_list = odl_api.search_dataset("mnist")
     # for index, res in enumerate(res_list):
     #     print(f"-->index: {index}, result: {res['name']}")
 
-    # implement_search("coco")
+    implement_search(ctx, "coco")
     print(f'*****'*8)
 
     # 2. list demo
@@ -44,11 +44,12 @@
 
     # 4. get dataset info
     implement_info(ctx, 'FB15k')
+    implement_info(ctx, 'COCO_1')
 
     # 5. download
     # get all files of dataset
     # implement_get(ctx, "MNIST", 4, 0)
 
     # get partial files of dataset
-    implement_get(ctx, "GOT-10k/data/test_data.zip", 4, 0) # 139, zip 1.16G GOT-10k
+    implement_get(ctx, "MNIST", 4, 0)
     print(f'*****' * 5)

From a9bd621bc736cb5c70357ea1adce81f11abfcd2a Mon Sep 17 00:00:00 2001
From: Suven <suchenlin@pjlab.org.cn>
Date: Fri, 11 Nov 2022 11:10:07 +0800
Subject: [PATCH 02/29] update version to 0.0.3

---
 README.md         | 10 +++++-----
 tests/demo.py     |  4 ++--
 tests/uaa_test.py |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 79f83e9..f09ad82 100644
--- a/README.md
+++ b/README.md
@@ -64,11 +64,11 @@ Login with opendatalab username and password. If you haven't an opendatalab acco
 
 ```cmd
 $ odl login
-Username []: wangrui@pjlab.org.cn
+Username []: someone@example.com
 Password []: 
-Login successfully as wangrui@pjlab.org.cn
+Login successfully as someone@example.com
 or
-$ odl login -u wangrui@pjlab.org.cn
+$ odl login -u someone@example.com
 Password[]:
 ```
 
@@ -77,7 +77,7 @@ Logout current opendatalab account
 ```cmd
 $ odl logout
 Do you want to logout? [y/N]: y
-wangrui@pjlab.org.cn.com logout
+someone@example.com logout
 ```
 
 
@@ -178,4 +178,4 @@ if __name__ == '__main__':
 ```
 
 ## Documentation
-More information can be found on the [documentation site](https://opendatalab.org.cn/docs)
+More information can be found on the [documentation site](https://opendatalab.org.cn/docs)
\ No newline at end of file
diff --git a/tests/demo.py b/tests/demo.py
index 22efd72..3d048b4 100644
--- a/tests/demo.py
+++ b/tests/demo.py
@@ -18,8 +18,8 @@
     odl_api = client.get_api()
 
     # 0. login with account
-    account = "xxx@pjlab.org.cn"  # your username
-    pw = "xxxx"  # your password
+    account = "someone@example.com"  # your username
+    pw = "password"  # your password
     print(f'*****'*8)
     implement_login(ctx, account, pw)
 
diff --git a/tests/uaa_test.py b/tests/uaa_test.py
index e827c76..9f5e6d0 100644
--- a/tests/uaa_test.py
+++ b/tests/uaa_test.py
@@ -120,8 +120,8 @@ def get_auth_code(ssouid):
 
 
 def main():
-    account = "191637988@qq.com" #"191637988@qq.com"  "chenlu@pjlab.org.cn"
-    pw = "qq11111111"
+    account = "someone@example.com" 
+    pw = "password"
     
     authorization = get_account(account=account, password=pw)
     sso_uid = get_user_info(authorization=authorization) 

From f1ab4ad8686e3e2f1573ff306925ea2268d3dfd9 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Thu, 23 Feb 2023 16:21:45 +0800
Subject: [PATCH 03/29] fix malfunction of search method

---
 opendatalab/client/api.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/opendatalab/client/api.py b/opendatalab/client/api.py
index 584857e..22f1ba1 100644
--- a/opendatalab/client/api.py
+++ b/opendatalab/client/api.py
@@ -64,6 +64,7 @@ def search_dataset(self, keywords):
             headers={"X-OPENDATALAB-API-TOKEN": self.token,
                      "Cookie": f"opendatalab_session={self.odl_cookie}",
                      "User-Agent": f"opendatalab-python-sdk/{__version__}",
+                     "Content-Type": "application/json"
                      },
             data=json.dumps({
                 "backend": False,
@@ -90,6 +91,7 @@ def get_similar_dataset(self, dataset):
             headers={"X-OPENDATALAB-API-TOKEN": self.token,
                      "Cookie": f"opendatalab_session={self.odl_cookie}",
                      "User-Agent": f"opendatalab-python-sdk/{__version__}",
+                     "Content-Type": "application/json"
                      },
         )
         if resp.status_code != 200:
@@ -105,6 +107,7 @@ def get_info(self, dataset):
             headers={"X-OPENDATALAB-API-TOKEN": self.token,
                      "Cookie": f"opendatalab_session={self.odl_cookie}",
                      "User-Agent": f"opendatalab-python-sdk/{__version__}",
+                     "Content-Type": "application/json"
                      },
         )
         if resp.status_code != 200:
@@ -128,6 +131,7 @@ def call_download_log(self, dataset, download_info):
             headers={"Content-Type": "application/json",
                      "Cookie": f"opendatalab_session={self.odl_cookie}",
                      "User-Agent": f"opendatalab-python-sdk/{__version__}",
+                     "Content-Type": "application/json"
                      },
         )
 
@@ -141,6 +145,7 @@ def get_download_record(self, dataset):
             headers={"Content-Type": "application/json",
                      "Cookie": f"opendatalab_session={self.odl_cookie}",
                      "User-Agent": f"opendatalab-python-sdk/{__version__}",
+                     "Content-Type": "application/json"
                      },
         )
 
@@ -169,6 +174,7 @@ def submit_download_record(self, dataset, download_data):
             headers={"Content-Type": "application/json",
                      "Cookie": f"opendatalab_session={self.odl_cookie}",
                      "User-Agent": f"opendatalab-python-sdk/{__version__}",
+                     "Content-Type": "application/json"
                      },
         )
 

From d8b054f8fcb3d8db325ebe4abb4965bcaf8c3077 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Mon, 27 Feb 2023 16:14:42 +0800
Subject: [PATCH 04/29] version number move to 0.0.4

---
 opendatalab/__version__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/opendatalab/__version__.py b/opendatalab/__version__.py
index 6d1372b..d7a6289 100644
--- a/opendatalab/__version__.py
+++ b/opendatalab/__version__.py
@@ -8,7 +8,7 @@
 """OpenDataLab python SDK version info."""
 
 __url__ = "https://opendatalab.org.cn"
-__version__ = "0.0.3"
+__version__ = "0.0.4"
 __svc__ = '2.0'
 odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
-uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"
+uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"
\ No newline at end of file

From 630a2851ef481f09606f98e97a2979cc4734dabd Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Thu, 2 Mar 2023 11:13:41 +0800
Subject: [PATCH 05/29] dev branch first commit, modified gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 4060180..60e3481 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+ILSVRC2012_Images
 
 # PyInstaller
 #  Usually these files are written by a python script from a template
@@ -81,6 +82,7 @@ target/
 # IPython
 profile_default/
 ipython_config.py
+tutorial.ipynb
 
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 __pypackages__/

From 2eb620a1a4e981d2f4f11715e9dc666b053248c5 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Tue, 7 Mar 2023 18:38:58 +0800
Subject: [PATCH 06/29] remove oss2 dependency, introduce parfive

---
 opendatalab/__init__.py        |   5 +-
 opendatalab/__version__.py     |  11 +-
 opendatalab/cli/cmd.py         |  28 ++---
 opendatalab/cli/get.py         | 194 +++++++++------------------------
 opendatalab/cli/login.py       |   2 +-
 opendatalab/cli/ls.py          |  35 +++---
 opendatalab/cli/search.py      |   8 +-
 opendatalab/cli/utility.py     |   7 +-
 opendatalab/client/api.py      |  74 ++++++++++++-
 opendatalab/client/client.py   |   6 +-
 opendatalab/client/uaa.py      |  11 +-
 opendatalab/dataset/dataset.py |  75 +------------
 setup.cfg                      |   1 +
 setup.py                       |   1 +
 tests/demo.py                  |  12 +-
 tests/uaa_test.py              |  29 ++---
 16 files changed, 197 insertions(+), 302 deletions(-)

diff --git a/opendatalab/__init__.py b/opendatalab/__init__.py
index bc338b4..4b545f1 100644
--- a/opendatalab/__init__.py
+++ b/opendatalab/__init__.py
@@ -6,8 +6,7 @@
 
 """OpenDataLab python SDK."""
 
-from opendatalab.__version__ import __version__
-from opendatalab.__version__ import __url__
+from opendatalab.__version__ import __url__, __version__
 from opendatalab.client.client import Client
 
-__all__ = ["__url__", "__version__", "Client"]
\ No newline at end of file
+__all__ = ["__url__", "__version__", "Client"]
diff --git a/opendatalab/__version__.py b/opendatalab/__version__.py
index d7a6289..f38caa2 100644
--- a/opendatalab/__version__.py
+++ b/opendatalab/__version__.py
@@ -7,8 +7,13 @@
 
 """OpenDataLab python SDK version info."""
 
-__url__ = "https://opendatalab.org.cn"
+__url__ = 'https://opendatalab-ut.shlab.tech'
 __version__ = "0.0.4"
 __svc__ = '2.0'
-odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
-uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"
\ No newline at end of file
+#odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
+# odl_clientId = "qja9jy5wnjyqwvylmeqw"
+#uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"
+uaa_url_prefix = "https://sso.staging.openxlab.org.cn/gw/uaa-be"
+# baseUrl: https://sso.staging.openxlab.org.cn/gw/uaa-be
+odl_clientId= 'ypkl8bwo0eb5ao1b96no'
+# clientSecret: 97gdrvwwzob86q2rneq2x95w6bnxkpqj5oak1ype
\ No newline at end of file
diff --git a/opendatalab/cli/cmd.py b/opendatalab/cli/cmd.py
index 6aa07c0..9126af3 100644
--- a/opendatalab/cli/cmd.py
+++ b/opendatalab/cli/cmd.py
@@ -8,7 +8,7 @@
 
 import click
 
-from opendatalab.__version__ import __version__, __url__, __svc__
+from opendatalab.__version__ import __svc__, __url__, __version__
 from opendatalab.cli.custom import CustomCommand
 from opendatalab.cli.utility import ContextInfo
 
@@ -92,8 +92,7 @@ def login(obj: ContextInfo, username: str, password: str):
 
 
 @command(synopsis=(
-        "$ odl ls dataset              # list dataset files",
-        "$ odl ls dataset/sub_dir      # list dataset/sub_dir files",))
+        "$ odl ls dataset              # list dataset files"))
 @click.argument("name", nargs=1)
 @click.pass_obj
 def ls(obj: ContextInfo, name: str) -> None:
@@ -137,32 +136,23 @@ def info(obj: ContextInfo, name):
 @command(synopsis=("$ odl get dataset_name      # get dataset files into local",))
 @click.argument("name", nargs=1)
 @click.option(
-    "--thread",
-    "-t",
-    default=8,
-    help="Number of thread for download",
-    show_default=True,
-)
-@click.option(
-    "--limit_speed",
-    "-l",
-    default=0,
-    help="Download limit speed: KB/s, 0 is unlimited",
+    "--conn",
+    "-c",
+    default=5,
+    help="The number of parallel download slots",
     show_default=True,
 )
 @click.pass_obj
-def get(obj: ContextInfo, name, thread, limit_speed):
+def get(obj: ContextInfo, name, conn = 5):
     """Get(Download) dataset files into local path.\f
-
     Args:
         obj (ContextInfo): context info\f
         name (str): dataset name\f
-        thread (int): multi-thread number\f
-        limit_speed (int): limit download speed, for not limit set value to 0
+        conn (int): The number of parallel download slots\f
     """
 
     from opendatalab.cli.get import implement_get
-    implement_get(obj, name, thread, limit_speed)
+    implement_get(obj, name, conn)
 
 
 if __name__ == "__main__":
diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index ede4fae..2cc8e19 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -12,16 +12,13 @@
 from typing import List
 
 import click
-import oss2
+import parfive
 from tqdm import tqdm
 
-from opendatalab.cli.policy import service_agreement_url, private_policy_url
+from opendatalab.cli.policy import private_policy_url, service_agreement_url
 from opendatalab.cli.utility import ContextInfo, exception_handler
 from opendatalab.exception import OdlDataNotExistsError
 
-oss2.set_stream_logger(level=logging.CRITICAL)
-key_to_get_size_map = {}
-
 
 def handler(dwCtrlType):
     if dwCtrlType == 0:  # CTRL_C_EVENT
@@ -34,62 +31,8 @@ def handler(dwCtrlType):
     win32api.SetConsoleCtrlHandler(handler, True)
 
 
-def get_oss_traffic_limit(limit_speed):
-    if limit_speed <= 0:
-        return 0
-    if limit_speed < 245760:
-        return 245760
-    if limit_speed > 838860800:
-        return 838860800
-    return limit_speed
-
-
-def download_object(
-        bucket: oss2.Bucket,
-        obj_key: str,
-        lock: threading.RLock,
-        root: str,
-        pbar: tqdm,
-        limit_speed: int,
-):
-    def progress_callback(bytes_consumed, _):
-        with lock:
-            global key_to_get_size_map
-            if obj_key not in key_to_get_size_map:
-                key_to_get_size_map[obj_key] = 0
-
-            # sys.stdout.flush()
-            pbar.update(bytes_consumed - key_to_get_size_map[obj_key])
-            key_to_get_size_map[obj_key] = bytes_consumed
-
-    try:
-        headers = dict()
-        if limit_speed > 0:
-            headers[oss2.models.OSS_TRAFFIC_LIMIT] = str(limit_speed)
-
-        filename = os.path.join(root, obj_key.split("/")[-1])
-
-        oss2.resumable_download(
-            bucket,
-            obj_key,
-            filename,
-            multiget_threshold=50 * 1024 * 1024,  # 50M -> 500G(cdn)
-            part_size=10 * 1024 * 1024,  # 10M
-            progress_callback=progress_callback,
-            num_threads=1,
-            headers=headers,
-        )
-        return True, None
-    except oss2.exceptions.InconsistentError as e:
-        return False, e
-    except oss2.exceptions.ServerError as e:
-        return False, e
-    except Exception as e:
-        return False, e
-
-
 @exception_handler
-def implement_get(obj: ContextInfo, name: str, thread: int, limit_speed: int, compressed: bool = True) -> None:
+def implement_get(obj: ContextInfo, name: str, conn = 5):
     """
     implementation for getting dataset files
     Args:
@@ -103,59 +46,47 @@ def implement_get(obj: ContextInfo, name: str, thread: int, limit_speed: int, co
     ds_split = name.split("/")
     if len(ds_split) > 1:
         dataset_name = ds_split[0]
-        sub_dir = "/".join(ds_split[1:])
     else:
         dataset_name = name
-        sub_dir = ""
-
+    
     client = obj.get_client()
-    info_data_name = client.get_api().get_info(dataset_name)['name']
-    dataset = client.get_dataset(info_data_name)
-    prefix = dataset.get_object_key_prefix(compressed)
-    bucket = dataset.get_oss_bucket()
-
-    total_files, total_size = 0, 0
+    data_info = client.get_api().get_info(dataset_name)
+    info_dataset_name = data_info['name']
+    info_dataset_id = data_info['id']
+    
+    dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name)
+    # obj list constuct
     obj_info_list = []
-    download_info_body = []
-
-    for info in oss2.ObjectIteratorV2(bucket, prefix):
-        if not info.is_prefix() and not info.key.endswith("/"):
-            file_name = "/".join(info.key.split("/")[2:])
-            f_name = Path(file_name).name
-            if not sub_dir:
-                obj_info_list.append(info.key)
-                total_files = total_files + 1
-                total_size = total_size + info.size
-                download_info_body.append({"name": f_name, "size": info.size})
-            elif sub_dir and file_name.startswith(sub_dir):
-                obj_info_list.append(info.key)
-                total_files = total_files + 1
-                total_size = total_size + info.size
-                download_info_body.append({"name": f_name, "size": info.size})
-            else:
-                pass
-
-    if len(download_info_body) == 0:
-        raise OdlDataNotExistsError(error_msg=f"{name} not exists!")
-
-    client.get_api().call_download_log(dataset_name, download_info_body)
-    click.echo(f"Scan done, total files: {len(obj_info_list)}, total size: {tqdm.format_sizeof(total_size,divisor=1024)}")
-
-    download_data = client.get_api().get_download_record(dataset_name)
+    for info in dataset_res_dict['list']:
+        curr_dict = {}
+        if not info['isDir']:
+            curr_dict['size'] = info['size']
+            curr_dict['name'] = info['path']
+            obj_info_list.append(curr_dict)
+    download_urls_list = client.get_api().get_dataset_download_urls(
+                                                            dataset_id=info_dataset_id, 
+                                                            dataset_list=obj_info_list)
+    url_list = []
+    item_list = []
+    for item in download_urls_list:
+        url_list.append(item['url'])
+        item_list.append(item['name'])
+    
+    
+    local_dir = Path.cwd().joinpath(info_dataset_name)
+    
+    download_data = client.get_api().get_download_record(info_dataset_name)
     has_download = download_data['hasDownload']
 
     if not has_download:
         if click.confirm(f"<<User Service Agreement>>: {service_agreement_url}"
                          f"\n<<Privacy Policy>>: {private_policy_url}"
                          f"\n[Warning]: Before downloading, please agree above content."):
-            client.get_api().submit_download_record(dataset_name, download_data)
+            client.get_api().submit_download_record(info_dataset_name, download_data)
         else:
             click.secho('bye~')
             sys.exit(1)
-
-    limit_speed_per_thread = get_oss_traffic_limit(int(limit_speed * 1024 * 8 / thread))
-
-    local_dir = Path.cwd().joinpath(dataset_name)
+    
     if click.confirm(f"Download files into local directory: {local_dir} ?", default=True):
         if not Path(local_dir).exists():
             Path(local_dir).mkdir(parents=True)
@@ -163,46 +94,23 @@ def implement_get(obj: ContextInfo, name: str, thread: int, limit_speed: int, co
     else:
         click.secho('bye~')
         sys.exit(1)
-
-    pbar = tqdm(total=total_size, unit="B", unit_divisor=1024, unit_scale=True, position=0)
-
-    index = 0
-    is_running = True
-    while is_running:
-        global key_to_get_size_map
-        bucket = dataset.refresh_oss_bucket()
-        error_object_list = get_objects_retry(bucket=bucket,
-                                              local_dir=local_dir,
-                                              obj_info_list=obj_info_list,
-                                              pbar=pbar,
-                                              limit_speed_per_thread=limit_speed_per_thread,
-                                              thread=thread)
-        index = index + 1
-        time.sleep(1)
-        if len(error_object_list) > 0:
-            obj_info_list = error_object_list
-            is_running = True
-            continue
-        else:
-            is_running = False
-            break
-
-    pbar.close()
-    print(f"{dataset_name} ,download completed!")
-
-
-def get_objects_retry(bucket, local_dir, obj_info_list, pbar, limit_speed_per_thread, thread) -> List:
-    lock = threading.RLock()
-    error_object_list = []
-    with ThreadPoolExecutor(max_workers=thread) as executor:
-        future_to_obj = {executor.submit(
-            download_object, bucket, obj, lock, local_dir, pbar, limit_speed_per_thread
-        ): obj for obj in obj_info_list}
-
-        for future in as_completed(future_to_obj):
-            obj = future_to_obj[future]
-            success, _ = future.result()
-            if not success:
-                error_object_list.append(obj)
-
-    return error_object_list
+        
+    downloader = parfive.Downloader(max_conn = conn,
+                                    max_splits= 5,
+                                    progress= True)
+    
+    for idx, url in enumerate(url_list):
+            downloader.enqueue_file(url, path = local_dir, filename=item_list[idx])
+        
+    results = downloader.download()
+    
+    for i in results:
+        click.echo(i)
+    
+    err_str = ''
+    for err in results.errors:
+        err_str += f"{err.url} \t {err.exception}\n"
+    if not err_str:
+        print(f"{info_dataset_name}, download completed!")
+    else:
+        sys.exit(err_str)
\ No newline at end of file
diff --git a/opendatalab/cli/login.py b/opendatalab/cli/login.py
index 0852374..1aee09a 100644
--- a/opendatalab/cli/login.py
+++ b/opendatalab/cli/login.py
@@ -2,6 +2,7 @@
 # Copyright 2022 Shanghai AI Lab. Licensed under MIT License.
 #
 import sys
+
 from opendatalab.cli.utility import ContextInfo, exception_handler
 
 
@@ -18,4 +19,3 @@ def implement_login(obj: ContextInfo, username: str, password: str) -> None:
         sys.exit(-1)
 
     print(f"Login successfully as {username}")
-
diff --git a/opendatalab/cli/ls.py b/opendatalab/cli/ls.py
index a0aa1e8..c5786a6 100644
--- a/opendatalab/cli/ls.py
+++ b/opendatalab/cli/ls.py
@@ -3,8 +3,8 @@
 #
 import sys
 
-import oss2
 from rich import box
+from rich import print as rprint
 from rich.console import Console
 from rich.table import Table
 
@@ -14,7 +14,7 @@
 
 
 @exception_handler
-def implement_ls(obj: ContextInfo, dataset: str) -> None:
+def implement_ls(obj: ContextInfo, dataset: str):
     """
     implementation for show dataset files
     Args:
@@ -33,27 +33,19 @@ def implement_ls(obj: ContextInfo, dataset: str) -> None:
         sub_dir = ""
 
     client = obj.get_client()
-    info_data_name = client.get_api().get_info(dataset_name)['name']
-    dataset_instance = client.get_dataset(dataset_name=info_data_name)
-
-    bucket = dataset_instance.get_oss_bucket()
-    prefix = dataset_instance.get_object_key_prefix(compressed=True)
+    info_dataset_name = client.get_api().get_info(dataset_name)['name']
+    dataset_instance = client.get_dataset(dataset_name=info_dataset_name)
 
+    dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name)
+    
+    # generate output info dict
     object_info_dict = {}
     total_files, total_size = 0, 0
-    for info in oss2.ObjectIteratorV2(bucket, prefix):
-        if not info.is_prefix() and not info.key.endswith("/"):
-            file_name = "/".join(info.key.split("/")[2:])
-            if not sub_dir:
-                object_info_dict[file_name] = bytes2human(info.size)
-                total_files = total_files + 1
-                total_size = total_size + info.size
-            elif sub_dir and file_name.startswith(sub_dir):
-                object_info_dict[file_name] = bytes2human(info.size)
-                total_files = total_files + 1
-                total_size = total_size + info.size
-            else:
-                pass
+    total_files = dataset_res_dict['total']
+    for info in dataset_res_dict['list']:
+        object_info_dict[info['path']] = bytes2human(info['size'])
+        total_size += info['size']
+
 
     if len(object_info_dict) == 0:
         raise OdlAccessDeniedError()
@@ -66,8 +58,7 @@ def implement_ls(obj: ContextInfo, dataset: str) -> None:
     table.add_column("File Name", min_width=20, justify='left')
     table.add_column("Size", width=12, justify='left')
 
-    print(f"total: {total_files}, size: {bytes2human(total_size)}")
+    print(f"Total file count: {total_files}, Size: {bytes2human(total_size)}")
     for key, val in sorted_object_info_dict.items():
         table.add_row(key, val, end_section=True)
-
     console.print(table)
diff --git a/opendatalab/cli/search.py b/opendatalab/cli/search.py
index c930595..1dbfed2 100644
--- a/opendatalab/cli/search.py
+++ b/opendatalab/cli/search.py
@@ -2,9 +2,12 @@
 # Copyright 2022 Shanghai AI Lab. Licensed under MIT License.
 #
 import re
+import time
+
 from rich import box
 from rich.console import Console
 from rich.table import Table
+
 from opendatalab.cli.utility import ContextInfo, exception_handler
 from opendatalab.utils import bytes2human
 
@@ -48,8 +51,11 @@ def implement_search(obj: ContextInfo, keywords: str) -> None:
     """
     client = obj.get_client()
     odl_api = client.get_api()
+    import time
+    time_start =time.time()
     result_list = odl_api.search_dataset(keywords)
-
+    time_end = time.time()
+    print('-------------time_consuming--------', time_end - time_start, 's')
     console = Console()
     table = Table(show_header=True, header_style='bold cyan', box=box.ASCII2)
     table.add_column("Name", min_width=10, justify='left', overflow='fold')
diff --git a/opendatalab/cli/utility.py b/opendatalab/cli/utility.py
index 699d7c7..737b911 100644
--- a/opendatalab/cli/utility.py
+++ b/opendatalab/cli/utility.py
@@ -4,17 +4,18 @@
 #
 
 """OpenDataLab CLI utility functions."""
-import sys
 import json
+import sys
 from functools import wraps
 from typing import Any, Callable, TypeVar
+
 import click
-from opendatalab.__version__ import __version__
 
+from opendatalab.__version__ import __version__
 from opendatalab.cli.config import config as client_config
 from opendatalab.client import Client
-from opendatalab.utils import UUID
 from opendatalab.exception import OpenDataLabError
+from opendatalab.utils import UUID
 
 _Callable = TypeVar("_Callable", bound=Callable[..., None])
 
diff --git a/opendatalab/client/api.py b/opendatalab/client/api.py
index 22f1ba1..2b0a9e8 100644
--- a/opendatalab/client/api.py
+++ b/opendatalab/client/api.py
@@ -19,7 +19,78 @@ def __init__(self, host, token, odl_cookie):
         self.host = host
         self.token = token
         self.odl_cookie = odl_cookie
+        
+    def get_dataset_files(self, dataset_name:str):
+        """ https request retrieve dataset files
+        Args:
+            dataset (str): dataset name
+            
+        Returns:
+            result_dict: 2 keys:
+                  dict['list']:contain list of files 
+                  dict['total']:files count.
+        """
+        
+        header_dict = {"X-OPENDATALAB-API-TOKEN": self.token,
+                "Cookie": f"opendatalab_session={self.odl_cookie}",
+                "User-Agent": UUID,
+                "accept" : "application/json"
+                }
+        data = {"recursive": True}
+        resp = requests.get(
+            url = f"{self.host}/api/datasets/{dataset_name}/files",
+            params = data,
+            headers = header_dict
+        )
+        if resp.status_code != 200:
+            if resp.status_code == 404:
+                raise OdlDataNotExistsError()
+            elif resp.status_code == 401:
+                raise OdlAuthError()
+            elif resp.status_code == 403:
+                raise OdlAccessDeniedError()
+            elif resp.status_code == 412:
+                raise OdlAccessCdnError()
+            elif resp.status_code == 500:
+                raise OdlAccessDeniedError()
+            else:
+                raise RespError(resp_code=resp.status_code, error_msg=resp.reason)
+        
+        result_dict = resp.json()['data']
+        
+        return result_dict
+    
+    def get_dataset_download_urls(self, dataset_id:int, dataset_list:list):
+        """get Dataset segments downloadable url
 
+        Args:
+            dataset (str): dataset name
+            dataset_list (list): list of dict contain segment size and name
+            
+        Returns:
+            download_url_list: list of dict contain segment name and executable url.
+        """
+        resp = requests.post(
+            f"{self.host}/api/track/datasets/download/{dataset_id}",
+            data = json.dumps(dataset_list),
+            headers={
+                    "Content-Type": "application/json",
+                    "Cookie": f"opendatalab_session={self.odl_cookie}",
+                    "User-Agent": f"opendatalab-python-sdk/{__version__}",
+                    "accept": "application/json"
+            }
+        )
+        if resp.status_code != 200:
+            print(f"{OpenDataLabError(resp.status_code, resp.text)}")
+            sys.exit(-1)
+        
+        download_url_list = resp.json()['data']
+        if not download_url_list:
+            click.secho(f"No datasets matched!", fg='red')
+            sys.exit(-1)
+        
+        return download_url_list
+    
     def get_dataset_sts(self, dataset, expires=900):
         """Get dataset sts by dataset_name
         Args:
@@ -183,6 +254,7 @@ def submit_download_record(self, dataset, download_data):
 
     def odl_auth(self, account, password):
         code = get_odl_token(account, password)
+        print(code)
         data = {
             "code": code,
             "redirect": "",
@@ -194,7 +266,7 @@ def odl_auth(self, account, password):
             data=data,
             headers={"Content-Type": "application/json"},
         )
-
+        print(resp.status_code)
         if resp.status_code != 200:
             raise OdlAuthError(resp.status_code, resp.text)
 
diff --git a/opendatalab/client/client.py b/opendatalab/client/client.py
index cf4a3f3..8b6db9a 100644
--- a/opendatalab/client/client.py
+++ b/opendatalab/client/client.py
@@ -2,10 +2,10 @@
 #
 # Copyright 2022 Shanghai AI Lab. Licensed under MIT License.
 #
+from opendatalab.__version__ import __url__
 from opendatalab.client.api import OpenDataLabAPI
 from opendatalab.dataset.dataset import Dataset
 from opendatalab.utils import get_api_token_from_env
-from opendatalab.__version__ import __url__
 
 
 class Client:
@@ -31,10 +31,6 @@ def get_dataset(self, dataset_name: str) -> Dataset:
                 f"{self.host}/datasets/{dataset_name}", self.token, self.odl_cookie)
         return self.dataset_map[dataset_name]
 
-    def get(self, dataset_name: int, filepath: str):
-        dataset = self.get_dataset(dataset_name)
-        return dataset.get(filepath)
-
     def get_api(self):
         self.odl_api = OpenDataLabAPI(self.host, self.token, self.odl_cookie)
         return self.odl_api
diff --git a/opendatalab/client/uaa.py b/opendatalab/client/uaa.py
index be720e5..00f90f8 100644
--- a/opendatalab/client/uaa.py
+++ b/opendatalab/client/uaa.py
@@ -1,14 +1,14 @@
+import json
 import sys
+import time
+from base64 import b64decode, b64encode
 
 import click
 import requests
-import json
-import time
-from Crypto.PublicKey import RSA
 from Crypto.Cipher import PKCS1_v1_5
-from base64 import b64encode, b64decode
+from Crypto.PublicKey import RSA
 
-from opendatalab.__version__ import uaa_url_prefix, odl_clientId
+from opendatalab.__version__ import odl_clientId, uaa_url_prefix
 
 api_login = "/api/v1/login/byClientSdk"
 api_public_key = "/api/v1/cipher/getPubKey"
@@ -121,6 +121,7 @@ def get_odl_token(account, password):
         auth_code = get_auth_code(sso_uid=sso_uid)
 
     if not auth_code:
+        print(auth_code)
         click.secho(f"Error: Auth failure with account: {account}", err=True, fg="red")
         sys.exit(1)
 
diff --git a/opendatalab/dataset/dataset.py b/opendatalab/dataset/dataset.py
index 22c9463..7630577 100644
--- a/opendatalab/dataset/dataset.py
+++ b/opendatalab/dataset/dataset.py
@@ -6,13 +6,12 @@
 import sys
 
 import click
-import oss2
 import requests
 from requests.adapters import HTTPAdapter
 
 from opendatalab.client.api import OpenDataLabAPI
 from opendatalab.exception import OpenDataLabError
-from opendatalab.utils import parse_url, get_api_token_from_env
+from opendatalab.utils import get_api_token_from_env, parse_url
 
 
 class Dataset:
@@ -25,74 +24,4 @@ def __init__(self, url: str, token: str = "", odl_cookie: str = "") -> None:
 
         self.oss_bucket = None
         self.bucket_name = None
-        self.oss_path_prefix = ""
-        self.init_oss_bucket()
-
-    def get(self, filepath: str, compressed: bool = True):
-        object_key = self.get_object_key_prefix(compressed) + filepath
-        try:
-            return self.oss_bucket.get_object(object_key)
-        except oss2.exceptions.ServerError as e:
-            if "InvalidAccessKeyId" not in str(e):
-                raise e
-
-            self.init_oss_bucket()
-            return self.oss_bucket.get_object(object_key)
-
-    def init_oss_bucket(self, expires=3600):
-        sts = self.open_data_lab_api.get_dataset_sts(self.dataset_name, expires=expires)
-
-        if sts:
-            path_info = sts["path"].replace("oss://", "").split("/")
-            bucket_name = path_info[0]
-            sts_point, sts_use_cname = self.select_endpoint(sts)
-
-            if sts_point:
-                auth = oss2.StsAuth(sts["accessKeyId"], sts["accessKeySecret"], sts["securityToken"])
-                self.oss_bucket = oss2.Bucket(auth, sts_point, bucket_name, is_cname=sts_use_cname)
-                self.oss_path_prefix = "/".join(path_info[1:])
-            else:
-                raise OpenDataLabError(1001, "access to bucket error")
-
-    def get_oss_bucket(self) -> oss2.Bucket:
-        if self.oss_bucket is None:
-            self.init_oss_bucket()
-        return self.oss_bucket
-
-    def refresh_oss_bucket(self) -> oss2.Bucket:
-        self.init_oss_bucket()
-        return self.get_oss_bucket()
-
-    def get_object_key_prefix(self, compressed: bool = True) -> str:
-        if compressed:
-            return f"{self.oss_path_prefix}/raw/"
-        else:
-            return f"{self.oss_path_prefix}/"
-
-    @classmethod
-    def select_endpoint(cls, sts):
-        s = requests.Session()
-        sts_endpoints = sts["endpoints"]
-        path_info = sts["path"].replace("oss://", "").split("/")
-        bucket_name = path_info[0]
-
-        # use general endpoint
-        if len(sts_endpoints) > 0:
-            endpoint = sts_endpoints[-1]
-            sts_endpoint = endpoint['url']
-            sts_use_cname = endpoint['useCname']
-
-            url_splitter = "://"
-            url_split_arr = str(sts_endpoint).split(url_splitter)
-            url_prefix = url_split_arr[0]
-            url_body = url_split_arr[1]
-            check_url = url_prefix + url_splitter + bucket_name + "." + url_body + "/check_connected"
-            s.mount(check_url, HTTPAdapter(max_retries=0))
-
-            try:
-                resp = s.get(check_url, timeout=(3, 1)) # 0.5
-                if resp.status_code == http.HTTPStatus.OK:
-                    return sts_endpoint, sts_use_cname
-            except Exception as e:
-                click.secho(f"ConnectionError occurs, please check network!", fg='red')
-                sys.exit(-1)
\ No newline at end of file
+        self.oss_path_prefix = ""
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index de15d29..872d9ab 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -36,6 +36,7 @@ install_requires =
     requests >= 2.4.2
     tqdm >= 4.14.0
     oss2
+    parfive
     colorama
     rich
     pywin32; platform_system == "Windows"
diff --git a/setup.py b/setup.py
index 9e866e6..d23e84f 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,5 @@
 import os
+
 import setuptools
 
 about = {}  # type: ignore
diff --git a/tests/demo.py b/tests/demo.py
index 3d048b4..ab14338 100644
--- a/tests/demo.py
+++ b/tests/demo.py
@@ -18,8 +18,8 @@
     odl_api = client.get_api()
 
     # 0. login with account
-    account = "someone@example.com"  # your username
-    pw = "password"  # your password
+    account = "18639553699"  # your username
+    pw = "wxj8023hh!"  # your password
     print(f'*****'*8)
     implement_login(ctx, account, pw)
 
@@ -35,12 +35,6 @@
     implement_ls(ctx, 'TAO')
     print(f'*****' * 8)
 
-    # 3. read file online demo
-    dataset = client.get_dataset('FB15k')
-    with dataset.get('meta/info.json', compressed=False) as fd:
-        content = json.load(fd)
-        print(f"{content}")
-    print(f'*****'*8)
 
     # 4. get dataset info
     implement_info(ctx, 'FB15k')
@@ -51,5 +45,5 @@
     # implement_get(ctx, "MNIST", 4, 0)
 
     # get partial files of dataset
-    implement_get(ctx, "MNIST", 4, 0)
+    implement_get(ctx, "MNIST")
     print(f'*****' * 5)
diff --git a/tests/uaa_test.py b/tests/uaa_test.py
index 9f5e6d0..ce416a5 100644
--- a/tests/uaa_test.py
+++ b/tests/uaa_test.py
@@ -1,14 +1,16 @@
-import requests
 import json
 import time
-from Crypto.PublicKey import RSA
+from base64 import b64decode, b64encode
+
+import requests
 from Crypto.Cipher import PKCS1_v1_5
-from base64 import b64encode, b64decode
+from Crypto.PublicKey import RSA
 
-odl_dev_clientId = "qja9jy5wnjyqwvylmeqw"
+odl_dev_clientId = "ypkl8bwo0eb5ao1b96no"
 odl_prd_clientId = "kmz3bkwzlaa3wrq8pvwa"
 
-uaa_dev_url_prefix = "https://uaa-dev.openmmlab.com/gw/uaa-be"
+
+uaa_dev_url_prefix = "https://sso.staging.openxlab.org.cn/gw/uaa-be"
 uaa_prd_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"
 
 api_login = "/api/v1/login/byAccount"
@@ -39,7 +41,7 @@ def get_public_key():
     result = ""
     if resp.status_code == 200:
         result  = resp.json()['data']['pubKey']
-        # print(result)
+        print(result)
     
     return result
 
@@ -75,7 +77,7 @@ def get_account(account ,password):
     authorization = None
     if resp.status_code == 200:
         result  = resp.json()['data']
-        # print(result)
+        print(result)
     
     if result:
         authorization = resp.headers['authorization']
@@ -95,7 +97,7 @@ def get_user_info(authorization):
     
         if resp.status_code == 200:
             result  = resp.json()['data']['ssoUid']
-            # print(result)
+            print(result)
         
     return result
 
@@ -114,14 +116,14 @@ def get_auth_code(ssouid):
                              )
         if resp.status_code == 200:
             result  = resp.json()['data']
-            # print(result)
+            print(result)
     
     return result    
 
 
 def main():
-    account = "someone@example.com" 
-    pw = "password"
+    account = "18639553699" 
+    pw = "wxj8023hh!"
     
     authorization = get_account(account=account, password=pw)
     sso_uid = get_user_info(authorization=authorization) 
@@ -129,6 +131,5 @@ def main():
 
 
 if __name__ == "__main__":
-    main()    
-    
-
+    main()
+    # print('!!!')
\ No newline at end of file

From 22e9ef62beffee9b2726b66334dc72f8c6c4a3fb Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Wed, 8 Mar 2023 18:10:56 +0800
Subject: [PATCH 07/29] remove parfive dependency, introduce request based
 resumable download(demo single file)

---
 opendatalab/cli/get.py    | 103 +++++++++++++++++++++++++++++++-------
 opendatalab/cli/ls.py     |   2 +-
 opendatalab/client/api.py |   5 +-
 3 files changed, 90 insertions(+), 20 deletions(-)

diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index 2cc8e19..31a525b 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -13,6 +13,7 @@
 
 import click
 import parfive
+import requests
 from tqdm import tqdm
 
 from opendatalab.cli.policy import private_policy_url, service_agreement_url
@@ -30,7 +31,52 @@ def handler(dwCtrlType):
     import win32api
     win32api.SetConsoleCtrlHandler(handler, True)
 
+@exception_handler
+def download_from_url(url:str, pth: str, file_name:str):
+    """This function perform a resumable download for a single object
 
+    Args:
+        url (str): single download url
+        pth(str): local download path
+        file_name (str): file name(may contain relative path)
+    """
+    response = requests.get(url, stream = True)
+    
+    # get total file size
+    file_size = int(response.headers['content-length'])
+    
+    target = os.path.join(pth, file_name)
+    # indicate a file-downloaing not complete
+    if os.path.exists(target):
+        first_byte = os.path.getsize(target)
+    else:
+        # indicate a new file
+        first_byte = 0
+    
+    # check actual size and server size
+    if first_byte >= file_size:
+        click.secho('Download Complete')
+        sys.exit(1)
+    
+    header = {"Range": f"bytes = {first_byte}-{file_size}"}
+    
+    pbar = tqdm(total=file_size,
+                initial= first_byte,
+                unit = 'B',
+                unit_scale= True,
+                desc = 'Downloading Progress:')
+    
+    req = requests.get(url, headers= header, stream=True)
+    
+    with(open(target, 'ab')) as f:
+        for chunk in req.iter_content(chunk_size=1024):
+            if chunk:
+                f.write(chunk)
+                pbar.update(1024)
+    pbar.close()
+    return file_size
+    
+    
 @exception_handler
 def implement_get(obj: ContextInfo, name: str, conn = 5):
     """
@@ -39,22 +85,28 @@ def implement_get(obj: ContextInfo, name: str, conn = 5):
         obj (ContextInfo):
         name (str):
         thread (int):
-        limit_speed (int):
         compressed (bool):
     Returns:
     """
     ds_split = name.split("/")
     if len(ds_split) > 1:
         dataset_name = ds_split[0]
+        sub_dir = "/".join(ds_split[1:])
     else:
         dataset_name = name
+        sub_dir = ""
+    
+    # print(name, ds_split ,dataset_name)
     
     client = obj.get_client()
     data_info = client.get_api().get_info(dataset_name)
     info_dataset_name = data_info['name']
     info_dataset_id = data_info['id']
     
-    dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name)
+    dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name,
+                                                          prefix = sub_dir)
+    # print(dataset_res_dict)
+    
     # obj list constuct
     obj_info_list = []
     for info in dataset_res_dict['list']:
@@ -63,15 +115,25 @@ def implement_get(obj: ContextInfo, name: str, conn = 5):
             curr_dict['size'] = info['size']
             curr_dict['name'] = info['path']
             obj_info_list.append(curr_dict)
+
+    # if not sub_dir:
+    print(obj_info_list, sub_dir)
     download_urls_list = client.get_api().get_dataset_download_urls(
                                                             dataset_id=info_dataset_id, 
                                                             dataset_list=obj_info_list)
+    # print(obj_info_list)
+    print('____________________________________________________-')
+    
+    
     url_list = []
     item_list = []
     for item in download_urls_list:
         url_list.append(item['url'])
         item_list.append(item['name'])
     
+    print(url_list[0], item_list[0])
+
+    
     
     local_dir = Path.cwd().joinpath(info_dataset_name)
     
@@ -95,22 +157,29 @@ def implement_get(obj: ContextInfo, name: str, conn = 5):
         click.secho('bye~')
         sys.exit(1)
         
-    downloader = parfive.Downloader(max_conn = conn,
-                                    max_splits= 5,
-                                    progress= True)
+        
+    ########################################################################
+    size = download_from_url(url_list[0], pth=local_dir, file_name = item_list[0])
+    ########################################################################
+    print(size)
     
-    for idx, url in enumerate(url_list):
-            downloader.enqueue_file(url, path = local_dir, filename=item_list[idx])
+    
+    # downloader = parfive.Downloader(max_conn = conn,
+    #                                 max_splits= 5,
+    #                                 progress= True)
+    
+    # for idx, url in enumerate(url_list):
+    #         downloader.enqueue_file(url, path = local_dir, filename=item_list[idx])
         
-    results = downloader.download()
+    # results = downloader.download()
     
-    for i in results:
-        click.echo(i)
+    # for i in results:
+    #     click.echo(i)
     
-    err_str = ''
-    for err in results.errors:
-        err_str += f"{err.url} \t {err.exception}\n"
-    if not err_str:
-        print(f"{info_dataset_name}, download completed!")
-    else:
-        sys.exit(err_str)
\ No newline at end of file
+    # err_str = ''
+    # for err in results.errors:
+    #     err_str += f"{err.url} \t {err.exception}\n"
+    # if not err_str:
+    #     print(f"{info_dataset_name}, download completed!")
+    # else:
+    #     sys.exit(err_str)
\ No newline at end of file
diff --git a/opendatalab/cli/ls.py b/opendatalab/cli/ls.py
index c5786a6..4c249e1 100644
--- a/opendatalab/cli/ls.py
+++ b/opendatalab/cli/ls.py
@@ -36,7 +36,7 @@ def implement_ls(obj: ContextInfo, dataset: str):
     info_dataset_name = client.get_api().get_info(dataset_name)['name']
     dataset_instance = client.get_dataset(dataset_name=info_dataset_name)
 
-    dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name)
+    dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name, prefix = sub_dir)
     
     # generate output info dict
     object_info_dict = {}
diff --git a/opendatalab/client/api.py b/opendatalab/client/api.py
index 2b0a9e8..2d94493 100644
--- a/opendatalab/client/api.py
+++ b/opendatalab/client/api.py
@@ -20,7 +20,7 @@ def __init__(self, host, token, odl_cookie):
         self.token = token
         self.odl_cookie = odl_cookie
         
-    def get_dataset_files(self, dataset_name:str):
+    def get_dataset_files(self, dataset_name:str, prefix:str):
         """ https request retrieve dataset files
         Args:
             dataset (str): dataset name
@@ -36,7 +36,8 @@ def get_dataset_files(self, dataset_name:str):
                 "User-Agent": UUID,
                 "accept" : "application/json"
                 }
-        data = {"recursive": True}
+        data = {"recursive": True,
+                "prefix":prefix}
         resp = requests.get(
             url = f"{self.host}/api/datasets/{dataset_name}/files",
             params = data,

From 3703cfc2b02fd0e2d13bb465c6384d021af5a5fb Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Fri, 10 Mar 2023 17:18:06 +0800
Subject: [PATCH 08/29] remove dependency of parfive, introduce brand new
 multi-threading & resumable downloader

---
 opendatalab/cli/cmd.py           |  27 ++-
 opendatalab/cli/get.py           | 118 +++--------
 opendatalab/client/downloader.py | 350 +++++++++++++++++++++++++++++++
 3 files changed, 397 insertions(+), 98 deletions(-)
 create mode 100644 opendatalab/client/downloader.py

diff --git a/opendatalab/cli/cmd.py b/opendatalab/cli/cmd.py
index 9126af3..d7b92ea 100644
--- a/opendatalab/cli/cmd.py
+++ b/opendatalab/cli/cmd.py
@@ -136,24 +136,29 @@ def info(obj: ContextInfo, name):
 @command(synopsis=("$ odl get dataset_name      # get dataset files into local",))
 @click.argument("name", nargs=1)
 @click.option(
-    "--conn",
-    "-c",
-    default=5,
-    help="The number of parallel download slots",
-    show_default=True,
+    "--dest",
+    "-d",
+    default='',
+    help="Desired dataset store path",
+    show_default=True
+)
+@click.option(
+    "--workers",
+    "-w",
+    default = 8,
+    help= "number of workers",
+    show_default = True
 )
 @click.pass_obj
-def get(obj: ContextInfo, name, conn = 5):
+def get(obj: ContextInfo, name, dest, workers):
     """Get(Download) dataset files into local path.\f
     Args:
         obj (ContextInfo): context info\f
         name (str): dataset name\f
-        conn (int): The number of parallel download slots\f
+        destination(str): desired dataset store path\f   
     """
-
+    
     from opendatalab.cli.get import implement_get
-    implement_get(obj, name, conn)
-
-
+    implement_get(obj, name, dest, workers)
 if __name__ == "__main__":
     cli()
diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index 31a525b..add0756 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -2,22 +2,17 @@
 #
 # Copyright 2022 Shanghai AI Lab. Licensed under MIT License.
 #
-import logging
 import os
 import sys
-import threading
-import time
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 from typing import List
 
 import click
-import parfive
-import requests
 from tqdm import tqdm
 
 from opendatalab.cli.policy import private_policy_url, service_agreement_url
 from opendatalab.cli.utility import ContextInfo, exception_handler
+from opendatalab.client import downloader
 from opendatalab.exception import OdlDataNotExistsError
 
 
@@ -30,55 +25,10 @@ def handler(dwCtrlType):
 if sys.platform == "win32":
     import win32api
     win32api.SetConsoleCtrlHandler(handler, True)
-
-@exception_handler
-def download_from_url(url:str, pth: str, file_name:str):
-    """This function perform a resumable download for a single object
-
-    Args:
-        url (str): single download url
-        pth(str): local download path
-        file_name (str): file name(may contain relative path)
-    """
-    response = requests.get(url, stream = True)
-    
-    # get total file size
-    file_size = int(response.headers['content-length'])
-    
-    target = os.path.join(pth, file_name)
-    # indicate a file-downloaing not complete
-    if os.path.exists(target):
-        first_byte = os.path.getsize(target)
-    else:
-        # indicate a new file
-        first_byte = 0
-    
-    # check actual size and server size
-    if first_byte >= file_size:
-        click.secho('Download Complete')
-        sys.exit(1)
-    
-    header = {"Range": f"bytes = {first_byte}-{file_size}"}
-    
-    pbar = tqdm(total=file_size,
-                initial= first_byte,
-                unit = 'B',
-                unit_scale= True,
-                desc = 'Downloading Progress:')
-    
-    req = requests.get(url, headers= header, stream=True)
-    
-    with(open(target, 'ab')) as f:
-        for chunk in req.iter_content(chunk_size=1024):
-            if chunk:
-                f.write(chunk)
-                pbar.update(1024)
-    pbar.close()
-    return file_size
     
     
 @exception_handler
-def implement_get(obj: ContextInfo, name: str, conn = 5):
+def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int):
     """
     implementation for getting dataset files
     Args:
@@ -96,7 +46,7 @@ def implement_get(obj: ContextInfo, name: str, conn = 5):
         dataset_name = name
         sub_dir = ""
     
-    # print(name, ds_split ,dataset_name)
+    # print(name, ds_split ,dataset_name, sub_dir)
     
     client = obj.get_client()
     data_info = client.get_api().get_info(dataset_name)
@@ -113,16 +63,16 @@ def implement_get(obj: ContextInfo, name: str, conn = 5):
         curr_dict = {}
         if not info['isDir']:
             curr_dict['size'] = info['size']
-            curr_dict['name'] = info['path']
+            curr_dict['name'] = os.path.join(sub_dir,info['path'])
             obj_info_list.append(curr_dict)
 
     # if not sub_dir:
-    print(obj_info_list, sub_dir)
+    # print(obj_info_list, sub_dir)
     download_urls_list = client.get_api().get_dataset_download_urls(
                                                             dataset_id=info_dataset_id, 
                                                             dataset_list=obj_info_list)
     # print(obj_info_list)
-    print('____________________________________________________-')
+    print('___________________________________________________')
     
     
     url_list = []
@@ -131,11 +81,9 @@ def implement_get(obj: ContextInfo, name: str, conn = 5):
         url_list.append(item['url'])
         item_list.append(item['name'])
     
-    print(url_list[0], item_list[0])
+    # print(url_list[0], item_list[0])
 
-    
-    
-    local_dir = Path.cwd().joinpath(info_dataset_name)
+    local_dir = destination
     
     download_data = client.get_api().get_download_record(info_dataset_name)
     has_download = download_data['hasDownload']
@@ -146,7 +94,7 @@ def implement_get(obj: ContextInfo, name: str, conn = 5):
                          f"\n[Warning]: Before downloading, please agree above content."):
             client.get_api().submit_download_record(info_dataset_name, download_data)
         else:
-            click.secho('bye~')
+            click.secho('See you next time~!')
             sys.exit(1)
     
     if click.confirm(f"Download files into local directory: {local_dir} ?", default=True):
@@ -154,32 +102,28 @@ def implement_get(obj: ContextInfo, name: str, conn = 5):
             Path(local_dir).mkdir(parents=True)
             print(f"create local dir: {local_dir}")
     else:
-        click.secho('bye~')
+        click.secho('See you next time~!')
         sys.exit(1)
         
-        
+    # print(url_list[0], item_list[0])
     ########################################################################
-    size = download_from_url(url_list[0], pth=local_dir, file_name = item_list[0])
-    ########################################################################
-    print(size)
-    
-    
-    # downloader = parfive.Downloader(max_conn = conn,
-    #                                 max_splits= 5,
-    #                                 progress= True)
-    
-    # for idx, url in enumerate(url_list):
-    #         downloader.enqueue_file(url, path = local_dir, filename=item_list[idx])
-        
-    # results = downloader.download()
-    
-    # for i in results:
-    #     click.echo(i)
-    
-    # err_str = ''
-    # for err in results.errors:
-    #     err_str += f"{err.url} \t {err.exception}\n"
-    # if not err_str:
-    #     print(f"{info_dataset_name}, download completed!")
-    # else:
-    #     sys.exit(err_str)
\ No newline at end of file
+
+    with tqdm(total = len(url_list)) as pbar:
+        for idx in range(len(url_list)):
+            if len(item_list[idx].split('/')) == 1:
+                filename = item_list[idx]
+                prefix = ''
+            else:
+                filename = item_list[idx].split('/')[-1]
+                prefix = os.path.dirname(item_list[idx])
+
+            click.echo(f"Downloading No.{idx+1} of total {len(url_list)} files\n")
+            if os.path.exists(os.path.join(destination,info_dataset_name, prefix,filename)):
+                click.echo('target already exists, jumping to next!')
+                continue
+            downloader.Downloader(url = url_list[idx], 
+                                  filename=item_list[idx], 
+                                  download_dir = os.path.join(destination, info_dataset_name), 
+                                  blocks_num= num_workers).start()
+            pbar.update(1)
+    click.echo(f"\nDownload Complete!")
diff --git a/opendatalab/client/downloader.py b/opendatalab/client/downloader.py
new file mode 100644
index 0000000..adf2498
--- /dev/null
+++ b/opendatalab/client/downloader.py
@@ -0,0 +1,350 @@
+# -*- coding: utf-8 -*-
+import glob
+import os
+import sys
+import threading
+import time
+
+import requests
+
+
+class Worker:
+    def __init__(self, name: str, url: str, range_start, range_end, cache_dir, finish_callback):
+        self.name = name
+        self.url = url
+        self.cache_filename = os.path.join(cache_dir, name + ".odl")
+        self.range_start = range_start  # fixed
+        self.range_end = range_end  # fixed
+        self.range_curser = range_start  # curser dynamic
+        self.finish_callback = finish_callback
+        self.terminate_flag = False
+        self.FINISH_TYPE = ""  # DONE\HELP\RETIRE 
+
+    def __run(self):
+        chunk_size = 1 * 1024
+        header = {
+            'Range': f'Bytes={self.range_curser}-{self.range_end}', 
+        }
+        req = requests.get(self.url, stream=True, headers=header)
+
+        if 200 <= req.status_code <= 299:
+            with open(self.cache_filename, "wb") as cache:
+                for chunk in req.iter_content(chunk_size=chunk_size):
+                    if self.terminate_flag:
+                        break
+                    cache.write(chunk)
+                    self.range_curser += len(chunk)
+        if not self.terminate_flag:
+            self.FINISH_TYPE = "DONE"
+        req.close()
+        self.finish_callback(self)
+
+    def start(self):
+        threading.Thread(target=self.__run).start()
+
+    def help(self):
+        self.FINISH_TYPE = "HELP"
+        self.terminate_flag = True
+
+    def retire(self):
+        self.FINISH_TYPE = "RETIRE"
+        self.terminate_flag = True
+
+    def __lt__(self, another):
+        return self.range_start < another.range_start
+
+    def get_progress(self):
+        """progress for each worker"""
+        _progress = {
+            "curser": self.range_curser,
+            "start": self.range_start,
+            "end": self.range_end
+        }
+        return _progress
+
+
+class Downloader:
+    def __init__(self, url: str, filename:str, download_dir: str, blocks_num: int = 8):
+        assert 0 <= blocks_num <= 32
+        self.prefix_flag = False
+        if len(filename.split('/')) == 1:
+            self.filename = filename
+            self.prefix = ''
+        else:
+            self.filename = filename.split('/')[-1]
+            self.prefix_flag = True
+            self.prefix = os.path.dirname(filename)
+        self.url = url
+        self.download_dir = download_dir
+        if os.path.exists(os.path.join(self.download_dir, self.prefix, self.filename)):
+            self.__whistleblower('target already exists')
+            self.stop()
+
+        # self.download_dir = os.path.join(download_dir, f".{os.sep}odl{os.sep}")
+        self.blocks_num = blocks_num
+        self.file_size = self.__get_size()
+
+        # make download dir
+        if not os.path.exists(self.download_dir):
+            os.makedirs(self.download_dir)
+            
+        # make cache dir
+        if self.prefix_flag:
+            self.cache_dir = os.path.join(self.download_dir,self.prefix,'.cache/')
+        else:
+            self.cache_dir = os.path.join(self.download_dir,'.cache/')
+        if not os.path.exists(self.cache_dir):
+            os.makedirs(self.cache_dir)
+        
+        # slicing
+        self.start_since = time.time()
+        # worker container
+        self.workers = []  
+        self.LOG = self.__get_log_from_cache() 
+        self.__done = threading.Event()
+        self.__download_record = []
+        threading.Thread(target=self.__supervise).start()
+        # main
+        self.__main_thread_done = threading.Event()
+        # 
+        readable_size = self.__get_readable_size(self.file_size)
+        pathfilename = os.path.join(self.download_dir, self.prefix,self.filename)
+
+    def __get_size(self):
+        try:
+            req = requests.head(self.url)
+            content_length = req.headers["Content-Length"]
+            req.close()
+            return int(content_length)
+        except Exception as err:
+            self.__whistleblower(f"[Error] {err}")
+            return 0
+
+    def __get_readable_size(self, size):
+        units = ["B", "KB", "MB", "GB", "TB", "PB"]
+        unit_index = 0
+        K = 1024.0
+        while size >= K:
+            size = size / K
+            unit_index += 1
+        return "%.1f %s" % (size, units[unit_index])
+
+    def __get_cache_filenames(self):
+        return glob.glob(f"{self.cache_dir}{self.filename}.*.odl")
+
+    def __get_ranges_from_cache(self):
+        # like ./cache/filename.1120.odl
+        ranges = []
+        for filename in self.__get_cache_filenames():
+            size = os.path.getsize(filename)
+            if size > 0:
+                cache_start = int(filename.split(".")[-2])
+                cache_end = cache_start + size - 1
+                ranges.append((cache_start, cache_end))
+        ranges.sort(key=lambda x: x[0])
+        return ranges
+
+    def __get_log_from_cache(self):
+        ranges = self.__get_ranges_from_cache()  
+        LOG = []
+        if len(ranges) == 0:
+            LOG.append((0, self.file_size - 1))
+        else:
+            for i, (start, end) in enumerate(ranges):
+                if i == 0:
+                    if start > 0:
+                        LOG.append((0, start - 1))
+                next_start = self.file_size if i == len(ranges) - 1 else ranges[i + 1][0]
+                if end < next_start - 1:
+                    LOG.append((end + 1, next_start - 1))
+        return LOG
+
+    def __increase_ranges_slice(self, ranges: list, minimum_size=1024 * 1024):
+        assert len(ranges) > 0
+        block_size = [end - start + 1 for start, end in ranges]
+        index_of_max = block_size.index(max(block_size))
+        start, end = ranges[index_of_max]
+        halfsize = block_size[index_of_max] // 2
+        if halfsize >= minimum_size:
+            new_ranges = [x for i, x in enumerate(ranges) if i != index_of_max]
+            new_ranges.append((start, start + halfsize))
+            new_ranges.append((start + halfsize + 1, end))
+        else:
+            new_ranges = ranges
+        return new_ranges
+
+    def __ask_for_work(self, worker_num: int):
+        """ask for work, return[work_range],update self.LOG"""
+        assert worker_num > 0
+        task = []
+        LOG_num = len(self.LOG)
+        # no work now, ask for new work
+        if LOG_num == 0:
+            self.__share_the_burdern()
+            return []
+        # enough work, consume
+        if LOG_num >= worker_num:  
+            for _ in range(worker_num):
+                task.append(self.LOG.pop(0))
+        # too much work
+        else:
+            slice_num = worker_num - LOG_num 
+            task = self.LOG
+            self.LOG = []
+            for _ in range(slice_num):
+                task = self.__increase_ranges_slice(task)
+        task.sort(key=lambda x: x[0])
+        return task
+
+    def __share_the_burdern(self, minimum_size=1024 * 1024):
+        """Find the heavy worker, and introduce helper"""
+        max_size = 0
+        max_size_name = ""
+        for w in self.workers:
+            p = w.get_progress()
+            size = p["end"] - p["curser"] + 1
+            if size > max_size:
+                max_size = size
+                max_size_name = w.name
+        if max_size >= minimum_size:
+            for w in self.workers:
+                if w.name == max_size_name:
+                    w.help()
+                    break
+
+    def __give_back_work(self, worker: Worker):
+        """Take unfinished work"""
+        progress = worker.get_progress()
+        curser = progress["curser"]
+        end = progress["end"]
+        if curser <= end:
+            self.LOG.append((curser, end))
+            self.LOG.sort(key=lambda x: x[0])
+
+    def __give_me_a_worker(self, start, end):
+        worker = Worker(name=f"{self.filename}.{start}",
+                          url=self.url, range_start=start, range_end=end, cache_dir=self.cache_dir,
+                          finish_callback=self.__on_worker_finish,
+                        )
+        return worker
+
+    def __whip(self, worker: Worker):
+        """assign new job"""
+        self.workers.append(worker)
+        self.workers.sort()
+        worker.start()
+
+    def __on_worker_finish(self, worker: Worker):
+        assert worker.FINISH_TYPE != ""
+        self.workers.remove(worker)
+        # need helper
+        if worker.FINISH_TYPE == "HELP":
+            self.__give_back_work(worker)
+            self.workaholic(2)
+        # job done
+        elif worker.FINISH_TYPE == "DONE":
+            # get one more job
+            self.workaholic(1)
+        elif worker.FINISH_TYPE == "RETIRE":
+            self.__give_back_work(worker)
+        # Job Done, Sewing!
+        if self.workers == [] and self.__get_log_from_cache() == []:
+            self.__sew()
+
+    def start(self):
+        # workers assembly
+        for start, end in self.__ask_for_work(self.blocks_num):
+            worker = self.__give_me_a_worker(start, end)
+            self.__whip(worker)
+        # wait till done
+        self.__main_thread_done.wait()
+
+    def stop(self):
+        for w in self.workers:
+            w.retire()
+        while len(self.workers) != 0:
+            time.sleep(0.5)
+        self.LOG = self.__get_log_from_cache()
+
+    def workaholic(self, n=1):
+        """ no work no life"""
+        for s, e in self.__ask_for_work(n):
+            worker = self.__give_me_a_worker(s, e)
+            self.__whip(worker)
+
+    def restart(self):
+        self.stop()
+        # worker assembly again!
+        for start, end in self.__ask_for_work(self.blocks_num):
+            worker = self.__give_me_a_worker(start, end)
+            self.__whip(worker)
+
+    def __supervise(self):
+        """worker and download status supervisor"""
+        REFRESH_INTERVAL = 1
+        # serve as a time window-length
+        LAG_COUNT = 5
+        WAIT_TIMES_BEFORE_RESTART = 30
+        SPEED_DEGRADATION_PERCENTAGE = 0.3
+        self.__download_record = []
+        maxspeed = 0
+        wait_times = WAIT_TIMES_BEFORE_RESTART
+        while not self.__done.is_set():
+            dwn_size = sum([os.path.getsize(cachefile) for cachefile in self.__get_cache_filenames()])
+            self.__download_record.append({"timestamp": time.time(), "size": dwn_size})
+            if len(self.__download_record) > LAG_COUNT:
+                self.__download_record.pop(0)
+            s = self.__download_record[-1]["size"] - self.__download_record[0]["size"]
+            t = self.__download_record[-1]["timestamp"] - self.__download_record[0]["timestamp"]
+            if not t == 0:
+                speed = s / t
+                readable_speed = self.__get_readable_size(speed)
+                # print(s,t,readable_speed)
+                percentage = self.__download_record[-1]["size"] / self.file_size * 100
+                status_msg = f"\r[Download Info] {percentage:.2f} % | {readable_speed}/s | workers: {len(self.workers)} | {(time.time() - self.start_since):.0f}s"
+                self.__whistleblower(status_msg)
+                # speed monitor
+                maxspeed = max(maxspeed, speed)
+                EPSILON = 1e-5  
+                # tolerance reached
+                time_over = wait_times < 0
+                # not finished yet
+                not_finished = not self.__done.is_set()
+                
+                # still running fast enough
+                speed_drops_significantly = (maxspeed - speed + EPSILON) / (maxspeed + EPSILON) > SPEED_DEGRADATION_PERCENTAGE
+                speed_under_threshold = speed < 1024 * 1024
+                scene_1 = speed_drops_significantly and speed_under_threshold
+                # running slow
+                scene_2 = speed < 100 * 1024  
+                if time_over and not_finished and (scene_1 or scene_2):
+                    self.__whistleblower("\r[info] speed degradation, restarting...")
+                    self.restart()
+                    maxspeed = 0
+                    wait_times = WAIT_TIMES_BEFORE_RESTART
+                else:
+                    wait_times -= 1
+            time.sleep(REFRESH_INTERVAL)
+
+    def __sew(self):
+        self.__done.set()
+        chunk_size = 10 * 1024 * 1024
+        with open(f"{os.path.join(self.download_dir, self.prefix, self.filename)}", "wb") as f:
+            for start, _ in self.__get_ranges_from_cache():
+                cache_filename = f"{self.cache_dir}{self.filename}.{start}.odl"
+                with open(cache_filename, "rb") as cache_file:
+                    data = cache_file.read(chunk_size)
+                    while data:
+                        f.write(data)
+                        f.flush()
+                        data = cache_file.read(chunk_size)
+        self.clear()
+        self.__whistleblower("\r")
+        self.__main_thread_done.set()
+
+    def __whistleblower(self, saying: str):
+        sys.stdout.write(saying)
+
+    def clear(self):
+        for filename in self.__get_cache_filenames():
+            os.remove(filename)
\ No newline at end of file

From 72a48f9d7a9f2db0c85d41853483e5b013457bd4 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Fri, 10 Mar 2023 17:26:54 +0800
Subject: [PATCH 09/29] remove redundancy

---
 opendatalab/client/downloader.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/opendatalab/client/downloader.py b/opendatalab/client/downloader.py
index adf2498..7c5e056 100644
--- a/opendatalab/client/downloader.py
+++ b/opendatalab/client/downloader.py
@@ -76,9 +76,7 @@ def __init__(self, url: str, filename:str, download_dir: str, blocks_num: int =
             self.prefix = os.path.dirname(filename)
         self.url = url
         self.download_dir = download_dir
-        if os.path.exists(os.path.join(self.download_dir, self.prefix, self.filename)):
-            self.__whistleblower('target already exists')
-            self.stop()
+        
 
         # self.download_dir = os.path.join(download_dir, f".{os.sep}odl{os.sep}")
         self.blocks_num = blocks_num

From 1478914c86812ab426aece40a542a5c8aa7e8cd5 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Fri, 17 Mar 2023 18:05:03 +0800
Subject: [PATCH 10/29]  revise donwload directory

---
 opendatalab/cli/cmd.py           |  3 +-
 opendatalab/cli/get.py           | 74 +++++++++++++++-----------------
 opendatalab/client/downloader.py | 12 ++++++
 setup.cfg                        |  3 +-
 4 files changed, 50 insertions(+), 42 deletions(-)

diff --git a/opendatalab/cli/cmd.py b/opendatalab/cli/cmd.py
index d7b92ea..5c93373 100644
--- a/opendatalab/cli/cmd.py
+++ b/opendatalab/cli/cmd.py
@@ -155,7 +155,8 @@ def get(obj: ContextInfo, name, dest, workers):
     Args:
         obj (ContextInfo): context info\f
         name (str): dataset name\f
-        destination(str): desired dataset store path\f   
+        destination(str): desired dataset store path\f
+        wokers(str): number of workers\f
     """
     
     from opendatalab.cli.get import implement_get
diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index add0756..99243d0 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -39,15 +39,20 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
     Returns:
     """
     ds_split = name.split("/")
+    if ds_split[-1] == '':
+        ds_split.pop()
+    dataset_name = ds_split[0]
+    single_file_flag = False
     if len(ds_split) > 1:
-        dataset_name = ds_split[0]
+        # if a single file
+        if ('.' in ds_split[-1]):
+            if len(ds_split) == 2:
+                single_file_flag = True
         sub_dir = "/".join(ds_split[1:])
     else:
         dataset_name = name
         sub_dir = ""
-    
-    # print(name, ds_split ,dataset_name, sub_dir)
-    
+        
     client = obj.get_client()
     data_info = client.get_api().get_info(dataset_name)
     info_dataset_name = data_info['name']
@@ -55,34 +60,23 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
     
     dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name,
                                                           prefix = sub_dir)
-    # print(dataset_res_dict)
     
+    total_object = dataset_res_dict['total']
+
     # obj list constuct
     obj_info_list = []
     for info in dataset_res_dict['list']:
         curr_dict = {}
         if not info['isDir']:
             curr_dict['size'] = info['size']
-            curr_dict['name'] = os.path.join(sub_dir,info['path'])
+            if single_file_flag:
+                curr_dict['name'] = info['path']
+            elif len(sub_dir.split('/')) > 1:
+                curr_dict['name'] = sub_dir
+            else:
+                curr_dict['name'] = os.path.join(sub_dir,info['path'])
             obj_info_list.append(curr_dict)
 
-    # if not sub_dir:
-    # print(obj_info_list, sub_dir)
-    download_urls_list = client.get_api().get_dataset_download_urls(
-                                                            dataset_id=info_dataset_id, 
-                                                            dataset_list=obj_info_list)
-    # print(obj_info_list)
-    print('___________________________________________________')
-    
-    
-    url_list = []
-    item_list = []
-    for item in download_urls_list:
-        url_list.append(item['url'])
-        item_list.append(item['name'])
-    
-    # print(url_list[0], item_list[0])
-
     local_dir = destination
     
     download_data = client.get_api().get_download_record(info_dataset_name)
@@ -104,26 +98,28 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
     else:
         click.secho('See you next time~!')
         sys.exit(1)
-        
-    # print(url_list[0], item_list[0])
-    ########################################################################
 
-    with tqdm(total = len(url_list)) as pbar:
-        for idx in range(len(url_list)):
-            if len(item_list[idx].split('/')) == 1:
-                filename = item_list[idx]
-                prefix = ''
-            else:
-                filename = item_list[idx].split('/')[-1]
-                prefix = os.path.dirname(item_list[idx])
 
-            click.echo(f"Downloading No.{idx+1} of total {len(url_list)} files\n")
-            if os.path.exists(os.path.join(destination,info_dataset_name, prefix,filename)):
+    with tqdm(total = total_object) as pbar:
+        for idx in range(total_object):
+            dataset_seg_list = []
+            dataset_seg_list.append(obj_info_list[idx])
+            download_urls_list = client.get_api().get_dataset_download_urls(
+                                                            dataset_id=info_dataset_id, 
+                                                            dataset_list=dataset_seg_list)
+            url_download = download_urls_list[0]['url']
+            filename = download_urls_list[0]['name']
+            # print(url_download, filename)
+            click.echo(f"Downloading No.{idx+1} of total {total_object} files")
+            if os.path.exists((os.path.join(destination, info_dataset_name, filename))):
+                # print(os.path.join(destination, info_dataset_name, filename))
                 click.echo('target already exists, jumping to next!')
+                pbar.update(1)
                 continue
-            downloader.Downloader(url = url_list[idx], 
-                                  filename=item_list[idx], 
+            downloader.Downloader(url = url_download, 
+                                  filename= filename, 
                                   download_dir = os.path.join(destination, info_dataset_name), 
                                   blocks_num= num_workers).start()
             pbar.update(1)
-    click.echo(f"\nDownload Complete!")
+            
+    click.echo(f"\nDownload Complete!")
\ No newline at end of file
diff --git a/opendatalab/client/downloader.py b/opendatalab/client/downloader.py
index 7c5e056..2599971 100644
--- a/opendatalab/client/downloader.py
+++ b/opendatalab/client/downloader.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 import glob
+import hashlib
 import os
 import sys
 import threading
@@ -343,6 +344,17 @@ def __sew(self):
     def __whistleblower(self, saying: str):
         sys.stdout.write(saying)
 
+    def md5(self):
+        chunk_size = 1024 * 1024
+        filename = f"{os.path.join(self.download_dir, self.prefix, self.filename)}"
+        md5 = hashlib.md5()
+        with open(filename, "rb") as f:
+            data = f.read(chunk_size)
+            while data:
+                md5.update(data)
+                data = f.read(chunk_size)
+        return md5.hexdigest()
+    
     def clear(self):
         for filename in self.__get_cache_filenames():
             os.remove(filename)
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 872d9ab..d9fc56b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -22,6 +22,7 @@ classifiers =
     Programming Language :: Python
     Programming Language :: Python :: 3
     Programming Language :: Python :: 3 :: Only
+    Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
     Programming Language :: Python :: 3.9
     Programming Language :: Python :: 3.10
@@ -35,8 +36,6 @@ install_requires =
     click >= 7.0.0
     requests >= 2.4.2
     tqdm >= 4.14.0
-    oss2
-    parfive
     colorama
     rich
     pywin32; platform_system == "Windows"

From 7f4f9f5630a1128da7806833a4c0c4cfd78974ff Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Fri, 17 Mar 2023 18:44:35 +0800
Subject: [PATCH 11/29] fix boader issue and update user panel

---
 opendatalab/client/downloader.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/opendatalab/client/downloader.py b/opendatalab/client/downloader.py
index 2599971..29e1b8a 100644
--- a/opendatalab/client/downloader.py
+++ b/opendatalab/client/downloader.py
@@ -82,7 +82,8 @@ def __init__(self, url: str, filename:str, download_dir: str, blocks_num: int =
         # self.download_dir = os.path.join(download_dir, f".{os.sep}odl{os.sep}")
         self.blocks_num = blocks_num
         self.file_size = self.__get_size()
-
+        if self.file_size <= 1:
+            return
         # make download dir
         if not os.path.exists(self.download_dir):
             os.makedirs(self.download_dir)
@@ -300,7 +301,7 @@ def __supervise(self):
                 readable_speed = self.__get_readable_size(speed)
                 # print(s,t,readable_speed)
                 percentage = self.__download_record[-1]["size"] / self.file_size * 100
-                status_msg = f"\r[Download Info] {percentage:.2f} % | {readable_speed}/s | workers: {len(self.workers)} | {(time.time() - self.start_since):.0f}s"
+                status_msg = f"\r[Current File Download Info] File Progress: {percentage:.2f} % | Speed: {readable_speed}/s | Number of Workers: {len(self.workers)} | Time Elapsed: {(time.time() - self.start_since):.0f}s | ETA: {((self.file_size- dwn_size)/speed):.2f}s"
                 self.__whistleblower(status_msg)
                 # speed monitor
                 maxspeed = max(maxspeed, speed)

From fb3af22614ac5096049cf1a2df3fd0edc89f29de Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Mon, 20 Mar 2023 11:13:11 +0800
Subject: [PATCH 12/29] update version

---
 opendatalab/__version__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opendatalab/__version__.py b/opendatalab/__version__.py
index f38caa2..4cf1e62 100644
--- a/opendatalab/__version__.py
+++ b/opendatalab/__version__.py
@@ -8,7 +8,7 @@
 """OpenDataLab python SDK version info."""
 
 __url__ = 'https://opendatalab-ut.shlab.tech'
-__version__ = "0.0.4"
+__version__ = "0.0.5"
 __svc__ = '2.0'
 #odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
 # odl_clientId = "qja9jy5wnjyqwvylmeqw"

From 18f57ffcb429e13663019a8a2e0210d941faf9dd Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Mon, 20 Mar 2023 17:36:05 +0800
Subject: [PATCH 13/29] fix dependency problem

---
 opendatalab/__version__.py       | 2 +-
 opendatalab/client/api.py        | 2 --
 opendatalab/client/downloader.py | 4 ++--
 setup.cfg                        | 5 +++--
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/opendatalab/__version__.py b/opendatalab/__version__.py
index 4cf1e62..6afe2c3 100644
--- a/opendatalab/__version__.py
+++ b/opendatalab/__version__.py
@@ -8,7 +8,7 @@
 """OpenDataLab python SDK version info."""
 
 __url__ = 'https://opendatalab-ut.shlab.tech'
-__version__ = "0.0.5"
+__version__ = "0.0.6"
 __svc__ = '2.0'
 #odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
 # odl_clientId = "qja9jy5wnjyqwvylmeqw"
diff --git a/opendatalab/client/api.py b/opendatalab/client/api.py
index 2d94493..8b4accd 100644
--- a/opendatalab/client/api.py
+++ b/opendatalab/client/api.py
@@ -255,7 +255,6 @@ def submit_download_record(self, dataset, download_data):
 
     def odl_auth(self, account, password):
         code = get_odl_token(account, password)
-        print(code)
         data = {
             "code": code,
             "redirect": "",
@@ -267,7 +266,6 @@ def odl_auth(self, account, password):
             data=data,
             headers={"Content-Type": "application/json"},
         )
-        print(resp.status_code)
         if resp.status_code != 200:
             raise OdlAuthError(resp.status_code, resp.text)
 
diff --git a/opendatalab/client/downloader.py b/opendatalab/client/downloader.py
index 29e1b8a..ae7248d 100644
--- a/opendatalab/client/downloader.py
+++ b/opendatalab/client/downloader.py
@@ -297,15 +297,15 @@ def __supervise(self):
             s = self.__download_record[-1]["size"] - self.__download_record[0]["size"]
             t = self.__download_record[-1]["timestamp"] - self.__download_record[0]["timestamp"]
             if not t == 0:
+                EPSILON = 1e-5
                 speed = s / t
                 readable_speed = self.__get_readable_size(speed)
                 # print(s,t,readable_speed)
                 percentage = self.__download_record[-1]["size"] / self.file_size * 100
-                status_msg = f"\r[Current File Download Info] File Progress: {percentage:.2f} % | Speed: {readable_speed}/s | Number of Workers: {len(self.workers)} | Time Elapsed: {(time.time() - self.start_since):.0f}s | ETA: {((self.file_size- dwn_size)/speed):.2f}s"
+                status_msg = f"\r[Current File Download Info] File Progress: {percentage:.2f} % | Speed: {readable_speed}/s | Number of Workers: {len(self.workers)} | Time Elapsed: {(time.time() - self.start_since):.0f}s | ETA: {((self.file_size- dwn_size)/(speed+EPSILON)):.2f}s"
                 self.__whistleblower(status_msg)
                 # speed monitor
                 maxspeed = max(maxspeed, speed)
-                EPSILON = 1e-5  
                 # tolerance reached
                 time_over = wait_times < 0
                 # not finished yet
diff --git a/setup.cfg b/setup.cfg
index d9fc56b..00a13e5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -31,11 +31,12 @@ classifiers =
 
 [options]
 packages = find:
-python_requires = >=3.8
+python_requires = >=3.7
 install_requires =
+    pycryptodome
     click >= 7.0.0
     requests >= 2.4.2
-    tqdm >= 4.14.0
+    tqdm
     colorama
     rich
     pywin32; platform_system == "Windows"

From cadf1f03a903e0ddd20aadfdb51c312cf272cb57 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Wed, 22 Mar 2023 13:54:26 +0800
Subject: [PATCH 14/29] fix cmd ls -h display layout

---
 opendatalab/cli/cmd.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/opendatalab/cli/cmd.py b/opendatalab/cli/cmd.py
index 5c93373..3984cbe 100644
--- a/opendatalab/cli/cmd.py
+++ b/opendatalab/cli/cmd.py
@@ -91,8 +91,7 @@ def login(obj: ContextInfo, username: str, password: str):
     implement_login(obj, username, password)
 
 
-@command(synopsis=(
-        "$ odl ls dataset              # list dataset files"))
+@command(synopsis=("$ odl ls dataset              # list dataset files",))
 @click.argument("name", nargs=1)
 @click.pass_obj
 def ls(obj: ContextInfo, name: str) -> None:
@@ -120,7 +119,7 @@ def search(obj: ContextInfo, keywords):
     implement_search(obj, keywords)
 
 
-@command(synopsis=("$ odl info dataset_name      # show dataset info.",))
+@command(synopsis=("$ odl info dataset_name      # show dataset info",))
 @click.argument("name", nargs=1)
 @click.pass_obj
 def info(obj: ContextInfo, name):

From f27819ad8a61ca24462e361a63490f9e2b2fd371 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Tue, 4 Apr 2023 17:54:52 +0800
Subject: [PATCH 15/29] add risk level

---
 opendatalab/cli/get.py           |  9 ++++++
 opendatalab/client/downloader.py | 52 +++++++++++++++++---------------
 2 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index 99243d0..1055280 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -55,6 +55,15 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
         
     client = obj.get_client()
     data_info = client.get_api().get_info(dataset_name)
+    
+    # get risk level
+    info_dataset_risk = data_info['attrs']['riskLevel']
+    info_dataset_url = data_info['attrs']['publishUrl']
+    if info_dataset_risk > 3:
+        click.echo(f"Direct download for {dataset_name} is currently not available."
+                   f"\nPlease visit the homepage {info_dataset_url} for more information.")
+        sys.exit(1)
+
     info_dataset_name = data_info['name']
     info_dataset_id = data_info['id']
     
diff --git a/opendatalab/client/downloader.py b/opendatalab/client/downloader.py
index ae7248d..f802d6b 100644
--- a/opendatalab/client/downloader.py
+++ b/opendatalab/client/downloader.py
@@ -81,42 +81,46 @@ def __init__(self, url: str, filename:str, download_dir: str, blocks_num: int =
 
         # self.download_dir = os.path.join(download_dir, f".{os.sep}odl{os.sep}")
         self.blocks_num = blocks_num
+        self.__bad_url_flag = False
         self.file_size = self.__get_size()
         if self.file_size <= 1:
             return
+        if not self.__bad_url_flag:
         # make download dir
-        if not os.path.exists(self.download_dir):
-            os.makedirs(self.download_dir)
-            
-        # make cache dir
-        if self.prefix_flag:
-            self.cache_dir = os.path.join(self.download_dir,self.prefix,'.cache/')
-        else:
-            self.cache_dir = os.path.join(self.download_dir,'.cache/')
-        if not os.path.exists(self.cache_dir):
-            os.makedirs(self.cache_dir)
-        
-        # slicing
-        self.start_since = time.time()
-        # worker container
-        self.workers = []  
-        self.LOG = self.__get_log_from_cache() 
-        self.__done = threading.Event()
-        self.__download_record = []
-        threading.Thread(target=self.__supervise).start()
-        # main
-        self.__main_thread_done = threading.Event()
-        # 
-        readable_size = self.__get_readable_size(self.file_size)
-        pathfilename = os.path.join(self.download_dir, self.prefix,self.filename)
+            if not os.path.exists(self.download_dir):
+                os.makedirs(self.download_dir)
+                
+            # make cache dir
+            if self.prefix_flag:
+                self.cache_dir = os.path.join(self.download_dir,self.prefix,'.cache/')
+            else:
+                self.cache_dir = os.path.join(self.download_dir,'.cache/')
+            if not os.path.exists(self.cache_dir):
+                os.makedirs(self.cache_dir)
+            # print(self.url, self.file_size)
+            # slicing
+            self.start_since = time.time()
+            # worker container
+            self.workers = []  
+            self.LOG = self.__get_log_from_cache() 
+            self.__done = threading.Event()
+            self.__download_record = []
+            threading.Thread(target=self.__supervise).start()
+            # main
+            self.__main_thread_done = threading.Event()
+            # 
+            readable_size = self.__get_readable_size(self.file_size)
+            pathfilename = os.path.join(self.download_dir, self.prefix,self.filename)
 
     def __get_size(self):
         try:
             req = requests.head(self.url)
             content_length = req.headers["Content-Length"]
             req.close()
+            # print(req.headers)
             return int(content_length)
         except Exception as err:
+            self.__bad_url_flag = True
             self.__whistleblower(f"[Error] {err}")
             return 0
 

From 38ff605a069faf46b6fcd4dfd37cea134eeb2db9 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Thu, 6 Apr 2023 16:24:09 +0800
Subject: [PATCH 16/29] add authorization before downloading

---
 opendatalab/cli/get.py     | 21 +++++++++++++++++++--
 opendatalab/cli/upgrade.py |  3 ++-
 opendatalab/client/api.py  | 24 ++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index 1055280..8165157 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -15,6 +15,13 @@
 from opendatalab.client import downloader
 from opendatalab.exception import OdlDataNotExistsError
 
+STATUS_DICT = {
+    # "noAuthRequired": "No Authorization needed",
+    "pendingRequirement": "In order to download this dataset, please fill in an application form via our website.",
+    "waiting": "Authorization submitted, please wait for the application result",
+    "rejected": "Authorization submitted, but rejected. Please contact us for more information",
+    # "accepted": "Authorization submitted, download available."
+}
 
 def handler(dwCtrlType):
     if dwCtrlType == 0:  # CTRL_C_EVENT
@@ -38,6 +45,7 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
         compressed (bool):
     Returns:
     """
+    # process dataset_name and split
     ds_split = name.split("/")
     if ds_split[-1] == '':
         ds_split.pop()
@@ -53,9 +61,20 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
         dataset_name = name
         sub_dir = ""
         
+    # client init    
     client = obj.get_client()
     data_info = client.get_api().get_info(dataset_name)
     
+    # basic info of dataset
+    info_dataset_name = data_info['name']
+    info_dataset_id = data_info['id']
+    
+    # check the download authorization status.
+    auth_status = client.get_api().get_auth_status(dataset_id=info_dataset_id)
+    if auth_status['state'] in STATUS_DICT.keys():
+        click.echo(f"{STATUS_DICT[auth_status['state']]}")
+        sys.exit(1)
+        
     # get risk level
     info_dataset_risk = data_info['attrs']['riskLevel']
     info_dataset_url = data_info['attrs']['publishUrl']
@@ -64,8 +83,6 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
                    f"\nPlease visit the homepage {info_dataset_url} for more information.")
         sys.exit(1)
 
-    info_dataset_name = data_info['name']
-    info_dataset_id = data_info['id']
     
     dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name,
                                                           prefix = sub_dir)
diff --git a/opendatalab/cli/upgrade.py b/opendatalab/cli/upgrade.py
index d23eb58..49412ed 100644
--- a/opendatalab/cli/upgrade.py
+++ b/opendatalab/cli/upgrade.py
@@ -1,8 +1,9 @@
 #
 # Copyright 2022 Shanghai AI Lab. Licensed under MIT License.
 #
-import sys
 import operator
+import sys
+
 import click
 
 from opendatalab.__version__ import __version__
diff --git a/opendatalab/client/api.py b/opendatalab/client/api.py
index 8b4accd..97c8ba3 100644
--- a/opendatalab/client/api.py
+++ b/opendatalab/client/api.py
@@ -91,6 +91,30 @@ def get_dataset_download_urls(self, dataset_id:int, dataset_list:list):
             sys.exit(-1)
         
         return download_url_list
+   
+    def get_auth_status(self, dataset_id:int):
+        """Get Dataset authentication status.
+
+
+        Args:
+            dataset_id (int): dataset id
+        """
+        resp = requests.get(
+            f"{self.host}/api/datasets/{dataset_id}/downloadAuth",
+            headers= {
+                "X-OPENDATALAB-API-TOKEN": self.token,
+                "Cookie": f"opendatalab_session={self.odl_cookie}",
+                "User-Agent": UUID,
+                "accept" : "application/json"
+            }
+        )
+        if resp.status_code != 200:
+            print(f"{OpenDataLabError(resp.status_code, resp.text)}")
+
+        result_status = resp.json()['data']
+        
+        return result_status
+    
     
     def get_dataset_sts(self, dataset, expires=900):
         """Get dataset sts by dataset_name

From 2e78d57adfea173d7306d0695bc171dc5d809914 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Tue, 18 Apr 2023 14:18:52 +0800
Subject: [PATCH 17/29] update readme and uncomment some prints

---
 README.md                 | 2 +-
 opendatalab/client/api.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index f09ad82..dd25e9d 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@ Commands:
 ### Version
 ```cmd
 $ odl version
-odl version, current: 0.0.2, svc: 1.8
+odl version, current: 0.0.6, svc: 1.8
 ```
 
 ### Login
diff --git a/opendatalab/client/api.py b/opendatalab/client/api.py
index 97c8ba3..387c1f3 100644
--- a/opendatalab/client/api.py
+++ b/opendatalab/client/api.py
@@ -95,7 +95,6 @@ def get_dataset_download_urls(self, dataset_id:int, dataset_list:list):
     def get_auth_status(self, dataset_id:int):
         """Get Dataset authentication status.
 
-
         Args:
             dataset_id (int): dataset id
         """
@@ -109,7 +108,7 @@ def get_auth_status(self, dataset_id:int):
             }
         )
         if resp.status_code != 200:
-            print(f"{OpenDataLabError(resp.status_code, resp.text)}")
+            click.echo(f"{OpenDataLabError(resp.status_code, resp.text)}")
 
         result_status = resp.json()['data']
         
@@ -191,7 +190,6 @@ def get_similar_dataset(self, dataset):
                      },
         )
         if resp.status_code != 200:
-            # print(f"{(resp.status_code, resp.text)}")
             sys.exit(-1)
 
         data = resp.json()['data']

From 841644a0fe4dc83fa3da4f48a0ace1756c815e0d Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Thu, 20 Apr 2023 19:40:45 +0800
Subject: [PATCH 18/29] fix malfunction under introduce of dsdl path

---
 opendatalab/cli/get.py           | 25 +++++++++++++------------
 opendatalab/cli/search.py        |  6 +++---
 opendatalab/client/api.py        |  2 ++
 opendatalab/client/downloader.py | 12 +++++++++---
 4 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index 8165157..7196dd1 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -46,20 +46,18 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
     Returns:
     """
     # process dataset_name and split
+    # print(name)
     ds_split = name.split("/")
+    dataset_name = ds_split[0]
     if ds_split[-1] == '':
         ds_split.pop()
-    dataset_name = ds_split[0]
     single_file_flag = False
-    if len(ds_split) > 1:
+    sub_dir = "/".join(ds_split[1:])
+    if len(ds_split) > 2:
         # if a single file
         if ('.' in ds_split[-1]):
-            if len(ds_split) == 2:
-                single_file_flag = True
-        sub_dir = "/".join(ds_split[1:])
-    else:
-        dataset_name = name
-        sub_dir = ""
+            single_file_flag = True
+            sub_dir = "/".join(ds_split[1:-1])
         
     # client init    
     client = obj.get_client()
@@ -76,7 +74,7 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
         sys.exit(1)
         
     # get risk level
-    info_dataset_risk = data_info['attrs']['riskLevel']
+    info_dataset_risk = data_info['attrs'].get('riskLevel', 0)
     info_dataset_url = data_info['attrs']['publishUrl']
     if info_dataset_risk > 3:
         click.echo(f"Direct download for {dataset_name} is currently not available."
@@ -86,7 +84,7 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
     
     dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name,
                                                           prefix = sub_dir)
-    
+    print(dataset_res_dict, sub_dir, single_file_flag)
     total_object = dataset_res_dict['total']
 
     # obj list constuct
@@ -100,9 +98,9 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
             elif len(sub_dir.split('/')) > 1:
                 curr_dict['name'] = sub_dir
             else:
-                curr_dict['name'] = os.path.join(sub_dir,info['path'])
+                curr_dict['name'] = info['path']
             obj_info_list.append(curr_dict)
-
+    print(obj_info_list)
     local_dir = destination
     
     download_data = client.get_api().get_download_record(info_dataset_name)
@@ -133,6 +131,7 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
             download_urls_list = client.get_api().get_dataset_download_urls(
                                                             dataset_id=info_dataset_id, 
                                                             dataset_list=dataset_seg_list)
+            # print(download_urls_list)
             url_download = download_urls_list[0]['url']
             filename = download_urls_list[0]['name']
             # print(url_download, filename)
@@ -142,6 +141,8 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
                 click.echo('target already exists, jumping to next!')
                 pbar.update(1)
                 continue
+            
+            print(url_download,filename)
             downloader.Downloader(url = url_download, 
                                   filename= filename, 
                                   download_dir = os.path.join(destination, info_dataset_name), 
diff --git a/opendatalab/cli/search.py b/opendatalab/cli/search.py
index 1dbfed2..b9ed42b 100644
--- a/opendatalab/cli/search.py
+++ b/opendatalab/cli/search.py
@@ -55,7 +55,7 @@ def implement_search(obj: ContextInfo, keywords: str) -> None:
     time_start =time.time()
     result_list = odl_api.search_dataset(keywords)
     time_end = time.time()
-    print('-------------time_consuming--------', time_end - time_start, 's')
+    # print('-------------time_consuming--------', time_end - time_start, 's')
     console = Console()
     table = Table(show_header=True, header_style='bold cyan', box=box.ASCII2)
     table.add_column("Name", min_width=10, justify='left', overflow='fold')
@@ -77,8 +77,8 @@ def implement_search(obj: ContextInfo, keywords: str) -> None:
             ds_desc_rich = rich_content_str(keywords=keywords, content=ds_desc)
 
             ds_attr_info = res['attrs']
-            ds_file_byte = bytes2human(ds_attr_info['fileBytes'])
-            ds_file_count = ds_attr_info['fileCount']
+            ds_file_byte = bytes2human(ds_attr_info.get('fileBytes', 0))
+            ds_file_count = ds_attr_info.get('fileCount',0)
 
             ds_data_types = _get_complex_types_str(ds_attr_info, 'mediaTypes')
             ds_task_types = _get_complex_types_str(ds_attr_info, 'taskTypes')
diff --git a/opendatalab/client/api.py b/opendatalab/client/api.py
index 387c1f3..853adaf 100644
--- a/opendatalab/client/api.py
+++ b/opendatalab/client/api.py
@@ -109,6 +109,7 @@ def get_auth_status(self, dataset_id:int):
         )
         if resp.status_code != 200:
             click.echo(f"{OpenDataLabError(resp.status_code, resp.text)}")
+            sys.exit(-1)
 
         result_status = resp.json()['data']
         
@@ -168,6 +169,7 @@ def search_dataset(self, keywords):
                 "state": ["online"],
             })
         )
+        print(resp.status_code, resp.url)
         if resp.status_code != 200:
             print(f"{OpenDataLabError(resp.status_code, resp.text)}")
             sys.exit(-1)
diff --git a/opendatalab/client/downloader.py b/opendatalab/client/downloader.py
index f802d6b..b7ee768 100644
--- a/opendatalab/client/downloader.py
+++ b/opendatalab/client/downloader.py
@@ -114,10 +114,16 @@ def __init__(self, url: str, filename:str, download_dir: str, blocks_num: int =
 
     def __get_size(self):
         try:
-            req = requests.head(self.url)
-            content_length = req.headers["Content-Length"]
-            req.close()
+            # print(self.url)
+            # req = requests.head(self.url)
             # print(req.headers)
+            # content_length = req.headers["Content-Length"]
+            resp = requests.get(self.url,stream=True)
+            content_length = resp.headers["Content-Length"]
+            # print(f"-------------{content_length}--------------")
+            resp.close()
+            # print(req.headers)
+            # print(req.headers["Content-Length"])
             return int(content_length)
         except Exception as err:
             self.__bad_url_flag = True

From 588e3bd509f268a00f077e50ab3f9331e3f6563a Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Tue, 25 Apr 2023 10:46:26 +0800
Subject: [PATCH 19/29] not showing detailed download info

---
 opendatalab/cli/get.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index 7196dd1..1aa5c75 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -84,7 +84,6 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
     
     dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name,
                                                           prefix = sub_dir)
-    print(dataset_res_dict, sub_dir, single_file_flag)
     total_object = dataset_res_dict['total']
 
     # obj list constuct
@@ -100,7 +99,7 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
             else:
                 curr_dict['name'] = info['path']
             obj_info_list.append(curr_dict)
-    print(obj_info_list)
+
     local_dir = destination
     
     download_data = client.get_api().get_download_record(info_dataset_name)
@@ -142,7 +141,6 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
                 pbar.update(1)
                 continue
             
-            print(url_download,filename)
             downloader.Downloader(url = url_download, 
                                   filename= filename, 
                                   download_dir = os.path.join(destination, info_dataset_name), 

From ffe5d1049bd03b1aed69857a33ba1ea076d8e92f Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Fri, 28 Apr 2023 15:50:33 +0800
Subject: [PATCH 20/29] for product

---
 opendatalab/__version__.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/opendatalab/__version__.py b/opendatalab/__version__.py
index 6afe2c3..ccf1ac9 100644
--- a/opendatalab/__version__.py
+++ b/opendatalab/__version__.py
@@ -7,13 +7,9 @@
 
 """OpenDataLab python SDK version info."""
 
-__url__ = 'https://opendatalab-ut.shlab.tech'
+__url__ = "https://opendatalab.org.cn"
 __version__ = "0.0.6"
 __svc__ = '2.0'
-#odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
-# odl_clientId = "qja9jy5wnjyqwvylmeqw"
-#uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"
-uaa_url_prefix = "https://sso.staging.openxlab.org.cn/gw/uaa-be"
-# baseUrl: https://sso.staging.openxlab.org.cn/gw/uaa-be
-odl_clientId= 'ypkl8bwo0eb5ao1b96no'
+odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
+uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"
 # clientSecret: 97gdrvwwzob86q2rneq2x95w6bnxkpqj5oak1ype
\ No newline at end of file

From 8ce87173e9f8500ea7a2f7b29dad0d913d143c86 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Fri, 28 Apr 2023 16:09:52 +0800
Subject: [PATCH 21/29] publish as 0.0.7

---
 opendatalab/__version__.py | 2 +-
 opendatalab/client/api.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/opendatalab/__version__.py b/opendatalab/__version__.py
index ccf1ac9..7cbdacb 100644
--- a/opendatalab/__version__.py
+++ b/opendatalab/__version__.py
@@ -8,7 +8,7 @@
 """OpenDataLab python SDK version info."""
 
 __url__ = "https://opendatalab.org.cn"
-__version__ = "0.0.6"
+__version__ = "0.0.7"
 __svc__ = '2.0'
 odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
 uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"
diff --git a/opendatalab/client/api.py b/opendatalab/client/api.py
index 853adaf..ac968d7 100644
--- a/opendatalab/client/api.py
+++ b/opendatalab/client/api.py
@@ -169,7 +169,7 @@ def search_dataset(self, keywords):
                 "state": ["online"],
             })
         )
-        print(resp.status_code, resp.url)
+        # print(resp.status_code, resp.url)
         if resp.status_code != 200:
             print(f"{OpenDataLabError(resp.status_code, resp.text)}")
             sys.exit(-1)

From a1c29e394c1dba4e0e8d527cb8c360db54b8a413 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Thu, 11 May 2023 14:56:15 +0800
Subject: [PATCH 22/29] fix single file download error

---
 opendatalab/cli/get.py | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index 1aa5c75..0b249b4 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -58,6 +58,7 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
         if ('.' in ds_split[-1]):
             single_file_flag = True
             sub_dir = "/".join(ds_split[1:-1])
+            file_name = sub_dir + '/' + ds_split[-1]
         
     # client init    
     client = obj.get_client()
@@ -84,21 +85,31 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
     
     dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name,
                                                           prefix = sub_dir)
-    total_object = dataset_res_dict['total']
-
-    # obj list constuct
-    obj_info_list = []
-    for info in dataset_res_dict['list']:
-        curr_dict = {}
-        if not info['isDir']:
-            curr_dict['size'] = info['size']
-            if single_file_flag:
-                curr_dict['name'] = info['path']
-            elif len(sub_dir.split('/')) > 1:
-                curr_dict['name'] = sub_dir
-            else:
+    # print(dataset_res_dict)
+    if not single_file_flag:
+        total_object = dataset_res_dict['total']
+        # obj list constuct
+        obj_info_list = []
+        for info in dataset_res_dict['list']:
+            curr_dict = {}
+            if not info['isDir']:
+                curr_dict['size'] = info['size']
+                if single_file_flag:
+                    curr_dict['name'] = info['path']
+                elif len(sub_dir.split('/')) > 1:
+                    curr_dict['name'] = sub_dir
+                else:
+                    curr_dict['name'] = info['path']
+                obj_info_list.append(curr_dict)
+    else:
+        total_object = 1
+        obj_info_list = []
+        for info in dataset_res_dict['list']:
+            curr_dict = {}
+            if info['path'] == str(file_name):
+                curr_dict['size'] = info['size']
                 curr_dict['name'] = info['path']
-            obj_info_list.append(curr_dict)
+                obj_info_list.append(curr_dict)
 
     local_dir = destination
     

From b1cf662f7ce3e1ffc7165c9923a3a78f1b67d043 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Thu, 11 May 2023 15:18:14 +0800
Subject: [PATCH 23/29] fix some display error

---
 opendatalab/cli/get.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index 0b249b4..b3320f7 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -59,7 +59,6 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
             single_file_flag = True
             sub_dir = "/".join(ds_split[1:-1])
             file_name = sub_dir + '/' + ds_split[-1]
-        
     # client init    
     client = obj.get_client()
     data_info = client.get_api().get_info(dataset_name)
@@ -97,7 +96,8 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
                 if single_file_flag:
                     curr_dict['name'] = info['path']
                 elif len(sub_dir.split('/')) > 1:
-                    curr_dict['name'] = sub_dir
+                    # curr_dict['name'] = sub_dir
+                    curr_dict['name'] = info['path']
                 else:
                     curr_dict['name'] = info['path']
                 obj_info_list.append(curr_dict)

From fd375c4550ffe48095fbb721e543eec36054730d Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Thu, 11 May 2023 15:21:44 +0800
Subject: [PATCH 24/29] move version to 0.0.8

---
 opendatalab/__version__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opendatalab/__version__.py b/opendatalab/__version__.py
index 7cbdacb..3c548a4 100644
--- a/opendatalab/__version__.py
+++ b/opendatalab/__version__.py
@@ -8,7 +8,7 @@
 """OpenDataLab python SDK version info."""
 
 __url__ = "https://opendatalab.org.cn"
-__version__ = "0.0.7"
+__version__ = "0.0.8"
 __svc__ = '2.0'
 odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
 uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"

From a2dd75160f9abeca2c7677cb8a7ee3c2163749ea Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Fri, 12 May 2023 17:40:25 +0800
Subject: [PATCH 25/29] fix README.md download error

---
 opendatalab/cli/get.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index b3320f7..8349896 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -53,13 +53,17 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
         ds_split.pop()
     single_file_flag = False
     sub_dir = "/".join(ds_split[1:])
-    if len(ds_split) > 2:
-        # if a single file
-        if ('.' in ds_split[-1]):
-            single_file_flag = True
+    
+    if ('.' in ds_split[-1]):
+        single_file_flag = True
+        if len(ds_split) == 2:
+            # indicate README.md
+            file_name = ds_split[-1]
+            sub_dir = ''
+        elif len(ds_split) > 2:
             sub_dir = "/".join(ds_split[1:-1])
             file_name = sub_dir + '/' + ds_split[-1]
-    # client init    
+                # client init
     client = obj.get_client()
     data_info = client.get_api().get_info(dataset_name)
     
@@ -84,7 +88,6 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
     
     dataset_res_dict = client.get_api().get_dataset_files(dataset_name=info_dataset_name,
                                                           prefix = sub_dir)
-    # print(dataset_res_dict)
     if not single_file_flag:
         total_object = dataset_res_dict['total']
         # obj list constuct

From e361cf933966ca473b9e1c9882495819df1f5c87 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Thu, 18 May 2023 15:47:03 +0800
Subject: [PATCH 26/29] fix several datasets download error

---
 opendatalab/cli/get.py           | 2 +-
 opendatalab/client/downloader.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/opendatalab/cli/get.py b/opendatalab/cli/get.py
index 8349896..c273026 100644
--- a/opendatalab/cli/get.py
+++ b/opendatalab/cli/get.py
@@ -54,7 +54,7 @@ def implement_get(obj: ContextInfo, name: str, destination:str, num_workers:int)
     single_file_flag = False
     sub_dir = "/".join(ds_split[1:])
     
-    if ('.' in ds_split[-1]):
+    if len(ds_split) >= 2 and ('.' in ds_split[-1]):
         single_file_flag = True
         if len(ds_split) == 2:
             # indicate README.md
diff --git a/opendatalab/client/downloader.py b/opendatalab/client/downloader.py
index b7ee768..6370597 100644
--- a/opendatalab/client/downloader.py
+++ b/opendatalab/client/downloader.py
@@ -291,7 +291,7 @@ def restart(self):
 
     def __supervise(self):
         """worker and download status supervisor"""
-        REFRESH_INTERVAL = 1
+        REFRESH_INTERVAL = 2
         # serve as a time window-length
         LAG_COUNT = 5
         WAIT_TIMES_BEFORE_RESTART = 30

From ab9285ae294372bf2d92d0a73bb4bb8ded62ecbd Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Thu, 18 May 2023 16:42:29 +0800
Subject: [PATCH 27/29] move version to 0.0.9

---
 opendatalab/__version__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/opendatalab/__version__.py b/opendatalab/__version__.py
index 3c548a4..75b1af8 100644
--- a/opendatalab/__version__.py
+++ b/opendatalab/__version__.py
@@ -8,7 +8,7 @@
 """OpenDataLab python SDK version info."""
 
 __url__ = "https://opendatalab.org.cn"
-__version__ = "0.0.8"
+__version__ = "0.0.9"
 __svc__ = '2.0'
 odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
 uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"

From ac868b1b44304329dff826a66561f565b2d66909 Mon Sep 17 00:00:00 2001
From: weixingjian <weixingjian@pjlab.org.cn>
Date: Tue, 1 Aug 2023 14:53:26 +0800
Subject: [PATCH 28/29] add: deprecation warning msg, add openxlab into
 dependency

---
 opendatalab/cli/utility.py | 3 +++
 setup.cfg                  | 1 +
 2 files changed, 4 insertions(+)

diff --git a/opendatalab/cli/utility.py b/opendatalab/cli/utility.py
index 737b911..d671766 100644
--- a/opendatalab/cli/utility.py
+++ b/opendatalab/cli/utility.py
@@ -10,6 +10,7 @@
 from typing import Any, Callable, TypeVar
 
 import click
+from rich import print as rprint
 
 from opendatalab.__version__ import __version__
 from opendatalab.cli.config import config as client_config
@@ -35,6 +36,8 @@ def __init__(self, url: str, token: str):
         self.check_ret = 0
         self.install_version = __version__
         self.latest_version = None
+        self.warning = "[red]WARNING[/red]:This CLI tool is deprecated and will be removed in a future release.\nThe [bold yellow]opendatalab(odl)[/bold yellow] pkg has been deprecated and will no longer be supported in few weeks.\nWe recommend that you switch to the [bold yellow]openxlab[/bold yellow] pkg, which accept same username/password,\nprovides the same functionality and other enhanced AI friendly features.\nMore details please refer to [blue]https://openxlab.org.cn/datasets[/blue]\n"
+        rprint(self.warning)
 
     def get_client(self) -> Client:
         return Client(self.url, self.token, self.cookie)
diff --git a/setup.cfg b/setup.cfg
index 00a13e5..25c840d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -39,6 +39,7 @@ install_requires =
     tqdm
     colorama
     rich
+    openxlab
     pywin32; platform_system == "Windows"
 
 [options.packages.find]

From a2ecca00d2718deb0b330e08ca7e1ed796c574ce Mon Sep 17 00:00:00 2001
From: WayneWei <waynesapphire@foxmail.com>
Date: Tue, 1 Aug 2023 15:03:46 +0800
Subject: [PATCH 29/29] Update __version__.py

---
 opendatalab/__version__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/opendatalab/__version__.py b/opendatalab/__version__.py
index 75b1af8..a24df51 100644
--- a/opendatalab/__version__.py
+++ b/opendatalab/__version__.py
@@ -8,8 +8,8 @@
 """OpenDataLab python SDK version info."""
 
 __url__ = "https://opendatalab.org.cn"
-__version__ = "0.0.9"
+__version__ = "0.0.10"
 __svc__ = '2.0'
 odl_clientId = "kmz3bkwzlaa3wrq8pvwa"
 uaa_url_prefix = "https://sso.openxlab.org.cn/gw/uaa-be"
-# clientSecret: 97gdrvwwzob86q2rneq2x95w6bnxkpqj5oak1ype
\ No newline at end of file
+# clientSecret: 97gdrvwwzob86q2rneq2x95w6bnxkpqj5oak1ype