Skip to content

Commit 99facc2

Browse files
authored
Merge pull request #56 from fzls/修复下载文件时Accept-Ranges为none时会无限重复请求该文件的问题_
fixed #58
2 parents d532fb6 + a061959 commit 99facc2

File tree

3 files changed

+74
-40
lines changed

3 files changed

+74
-40
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434

3535
# 更新日志
3636

37+
## `v2.6.4`
38+
39+
- 修复无法获取分享文件夹信息的问题[#58](https://github.com/zaxtyson/LanZouCloud-API/pull/58)
40+
3741
## `v2.6.3`
3842

3943
- 修复下载页的 Cookie 验证问题[#55](https://github.com/zaxtyson/LanZouCloud-API/pull/55)

lanzou/api/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from lanzou.api.core import LanZouCloud
22

3-
version = '2.6.3'
3+
version = '2.6.4'
44

55
__all__ = ['utils', 'types', 'models', 'LanZouCloud', 'version']

lanzou/api/core.py

Lines changed: 69 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -958,25 +958,8 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
958958
if not resp:
959959
return LanZouCloud.FAILED
960960

961-
content_length = resp.headers.get('Content-Length', None)
962-
# 如果无法获取 Content-Length, 先读取一点数据, 再尝试获取一次
963-
# 通常只需读取 1 字节数据
964-
data_iter = resp.iter_content(chunk_size=1)
965-
while not content_length:
966-
logger.warning("Not found Content-Length in response headers")
967-
logger.debug("Read 1 byte from stream...")
968-
try:
969-
next(data_iter)
970-
except StopIteration:
971-
logger.debug("Please wait for a moment before downloading")
972-
return LanZouCloud.FAILED
973-
resp_ = self._get(info.durl, stream=True)
974-
if not resp_:
975-
return LanZouCloud.FAILED
976-
content_length = resp_.headers.get('Content-Length', None)
977-
logger.debug(f"Content-Length: {content_length}")
978-
979-
total_size = int(content_length)
961+
# 如果本地存在同名文件且设置了 overwrite, 则覆盖原文件
962+
# 否则修改下载文件路径, 自动在文件名后加序号
980963
file_path = save_path + os.sep + info.name
981964
if os.path.exists(file_path):
982965
if overwrite:
@@ -989,9 +972,33 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
989972
tmp_file_path = file_path + '.download' # 正在下载中的文件名
990973
logger.debug(f'Save file to {tmp_file_path}')
991974

975+
# 对于 txt 文件, 可能出现没有 Content-Length 的情况
976+
# 此时文件需要下载一次才会出现 Content-Length
977+
# 这时候我们先读取一点数据, 再尝试获取一次, 通常只需读取 1 字节数据
978+
content_length = resp.headers.get('Content-Length', None)
979+
if not content_length:
980+
data_iter = resp.iter_content(chunk_size=1)
981+
max_retries = 5 # 5 次拿不到就算了
982+
while not content_length and max_retries > 0:
983+
max_retries -= 1
984+
logger.warning("Not found Content-Length in response headers")
985+
logger.debug("Read 1 byte from stream...")
986+
try:
987+
next(data_iter) # 读取一个字节
988+
except StopIteration:
989+
logger.debug("Please wait for a moment before downloading")
990+
return LanZouCloud.FAILED
991+
resp_ = self._get(info.durl, stream=True) # 再请求一次试试
992+
if not resp_:
993+
return LanZouCloud.FAILED
994+
content_length = resp_.headers.get('Content-Length', None)
995+
logger.debug(f"Content-Length: {content_length}")
996+
997+
if not content_length:
998+
return LanZouCloud.FAILED # 应该不会出现这种情况
999+
1000+
# 支持断点续传下载
9921001
now_size = 0
993-
chunk_size = 4096
994-
last_512_bytes = b'' # 用于识别文件是否携带真实文件名信息
9951002
if os.path.exists(tmp_file_path):
9961003
now_size = os.path.getsize(tmp_file_path) # 本地已经下载的文件大小
9971004
headers = {**self._headers, 'Range': 'bytes=%d-' % now_size}
@@ -1004,30 +1011,43 @@ def down_file_by_url(self, share_url, pwd='', save_path='./Download', *, callbac
10041011

10051012
with open(tmp_file_path, "ab") as f:
10061013
file_name = os.path.basename(file_path)
1007-
for chunk in resp.iter_content(chunk_size):
1014+
for chunk in resp.iter_content(4096):
10081015
if chunk:
10091016
f.write(chunk)
10101017
f.flush()
10111018
now_size += len(chunk)
1012-
if total_size - now_size < 512:
1013-
last_512_bytes += chunk
10141019
if callback is not None:
1015-
callback(file_name, total_size, now_size)
1020+
callback(file_name, int(content_length), now_size)
1021+
1022+
# 文件下载完成后, 检查文件尾部 512 字节数据
1023+
# 绕过官方限制上传时, API 会隐藏文件真实信息到文件尾部
1024+
# 这里尝试提取隐藏信息, 并截断文件尾部数据
10161025
os.rename(tmp_file_path, file_path) # 下载完成,改回正常文件名
1017-
# 尝试解析文件报尾
1018-
file_info = un_serialize(last_512_bytes[-512:])
1019-
if file_info is not None and 'padding' in file_info: # 大文件的记录文件也可以反序列化出 name,但是没有 padding
1020-
real_name = file_info['name'] # 解除伪装的真实文件名
1021-
logger.debug(f"Find meta info: real_name={real_name}")
1022-
real_path = save_path + os.sep + real_name
1023-
if overwrite and os.path.exists(real_path):
1024-
os.remove(real_path) # 删除原文件
1025-
new_file_path = auto_rename(real_path)
1026-
os.rename(file_path, new_file_path)
1027-
with open(new_file_path, 'rb+') as f:
1028-
f.seek(-512, 2) # 截断最后 512 字节数据
1029-
f.truncate()
1030-
file_path = new_file_path # 保存文件重命名后真实路径
1026+
if os.path.getsize(file_path) > 512: # 文件大于 512 bytes 就检查一下
1027+
file_info = None
1028+
with open(file_path, 'rb') as f:
1029+
f.seek(-512, os.SEEK_END)
1030+
last_512_bytes = f.read()
1031+
file_info = un_serialize(last_512_bytes)
1032+
1033+
# 大文件的记录文件也可以反序列化出 name,但是没有 padding 字段
1034+
if file_info is not None and 'padding' in file_info:
1035+
real_name = file_info['name'] # 解除伪装的真实文件名
1036+
logger.debug(f"Find meta info: real_name={real_name}")
1037+
real_path = save_path + os.sep + real_name
1038+
# 如果存在同名文件且设置了 overwrite, 删掉原文件
1039+
if overwrite and os.path.exists(real_path):
1040+
os.remove(real_path)
1041+
# 自动重命名, 文件存在就会加个序号
1042+
new_file_path = auto_rename(real_path)
1043+
os.rename(file_path, new_file_path)
1044+
# 截断最后 512 字节隐藏信息, 还原文件
1045+
with open(new_file_path, 'rb+') as f:
1046+
f.seek(-512, os.SEEK_END)
1047+
f.truncate()
1048+
file_path = new_file_path # 保存文件重命名后真实路径
1049+
1050+
# 如果设置了下载完成的回调函数, 调用之
10311051
if downloaded_handler is not None:
10321052
downloaded_handler(os.path.abspath(file_path))
10331053
return LanZouCloud.SUCCESS
@@ -1054,6 +1074,15 @@ def get_folder_info_by_url(self, share_url, dir_pwd='') -> FolderDetail:
10541074
# 要求输入密码, 用户描述中可能带有"输入密码",所以不用这个字符串判断
10551075
if ('id="pwdload"' in html or 'id="passwddiv"' in html) and len(dir_pwd) == 0:
10561076
return FolderDetail(LanZouCloud.LACK_PASSWORD)
1077+
1078+
if "acw_sc__v2" in html:
1079+
# 在页面被过多访问或其他情况下,有时候会先返回一个加密的页面,其执行计算出一个acw_sc__v2后放入页面后再重新访问页面才能获得正常页面
1080+
# 若该页面进行了js加密,则进行解密,计算acw_sc__v2,并加入cookie
1081+
acw_sc__v2 = calc_acw_sc__v2(html)
1082+
self._session.cookies.set("acw_sc__v2", acw_sc__v2)
1083+
logger.debug(f"Set Cookie: acw_sc__v2={acw_sc__v2}")
1084+
html = self._get(share_url).text # 文件分享页面(第一页)
1085+
10571086
try:
10581087
# 获取文件需要的参数
10591088
html = remove_notes(html)
@@ -1144,6 +1173,7 @@ def _check_big_file(self, file_list):
11441173
logger.debug(f"Big file checking: Failed")
11451174
return None
11461175
resp = self._get(info.durl)
1176+
# 这里无需知道 txt 文件的 Content-Length, 全部读取即可
11471177
info = un_serialize(resp.content) if resp else None
11481178
if info is not None: # 确认是大文件
11491179
name, size, *_, parts = info.values() # 真实文件名, 文件字节大小, (其它数据),分段数据文件名(有序)

0 commit comments

Comments
 (0)