Skip to content

Commit 8b5d1db

Browse files
authored
Releases v0.12.0 (#251)
1 parent ea82bb7 commit 8b5d1db

File tree

298 files changed

+18891
-13259
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

298 files changed

+18891
-13259
lines changed

License

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ BSD 3-Clause
233233
BSD 2-Clause
234234
------------
235235

236-
- python-tblib:1.3.2
236+
- python-tblib:3.0.0
237237

238238

239239
MIT License

MANIFEST.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
recursive-include odps/static *.*
2-
recursive-include odps/internal/static *.*
32
prune odps/static/ui/node_modules
43
include requirements.txt
54
global-include odps/**/*.yml

benchmarks/perf_storage_api_arrow.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@
1414

1515
import logging
1616
import sys
17-
import time
1817
import threading
18+
import time
1919

2020
import pytest
2121

2222
from odps.apis.storage_api.conftest import storage_api_client # noqa: F401
23+
2324
if sys.version_info[0] == 3:
2425
from odps.apis.storage_api import *
2526
else:
@@ -87,7 +88,10 @@ def test_read_thread(storage_api_client):
8788
global global_total_record
8889
read_performance_threads = []
8990
for i in range(0, thread_num):
90-
read_performance_thread = threading.Thread(target=read_performance, args=[storage_api_client,])
91+
read_performance_thread = threading.Thread(
92+
target=read_performance,
93+
args=[storage_api_client],
94+
)
9195
read_performance_threads.append(read_performance_thread)
9296

9397
start = time.time()
@@ -104,7 +108,10 @@ def test_read_thread(storage_api_client):
104108
time.sleep(1)
105109
now = time.time()
106110
now_count = global_total_record
107-
logger.info("index: %d, read, %f records per second" % (count, (now_count - start_count) / (now - start)))
111+
logger.info(
112+
"index: %d, read, %f records per second"
113+
% (count, (now_count - start_count) / (now - start))
114+
)
108115

109116
if judge and cal_count < 5:
110117
cal_total_count += (now_count - start_count) / (now - start)

benchmarks/perf_tabletunnel.py

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
# limitations under the License.
1616

1717
from __future__ import print_function
18+
1819
import cProfile
1920
import json
2021
import os
@@ -26,12 +27,14 @@
2627

2728
if bool(json.loads(os.getenv("FORCE_PY", "0"))):
2829
from odps import options
30+
2931
options.force_py = True
3032

33+
from datetime import datetime
34+
3135
from odps.compat import Decimal
3236
from odps.conftest import odps, tunnel # noqa: F401
3337
from odps.models import TableSchema
34-
from datetime import datetime
3538

3639
# remember to reset False before committing
3740
ENABLE_PROFILE = bool(json.loads(os.getenv("ENABLE_PROFILE", "0")))
@@ -40,14 +43,16 @@
4043
COMPRESS_DATA = True
4144
BUFFER_SIZE = 1024 * 1024
4245
DATA_AMOUNT = 100000
43-
STRING_LITERAL = "Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
46+
STRING_LITERAL = (
47+
"Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
48+
)
4449
NUMERIC_ONLY = bool(json.loads(os.getenv("NUMERIC_ONLY", "0")))
4550

4651

4752
@pytest.fixture
4853
def schema():
49-
fields = ['a', 'b', 'c', 'd', 'e', 'f']
50-
types = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
54+
fields = ["a", "b", "c", "d", "e", "f"]
55+
types = ["bigint", "double", "datetime", "boolean", "string", "decimal"]
5156
return TableSchema.from_lists(fields, types)
5257

5358

@@ -61,52 +66,54 @@ def profiled():
6166
finally:
6267
if ENABLE_PROFILE:
6368
if DUMP_PROFILE:
64-
pr.dump_stats('profile.out')
69+
pr.dump_stats("profile.out")
6570
p = Stats(pr)
6671
p.strip_dirs()
67-
p.sort_stats('time')
72+
p.sort_stats("time")
6873
p.print_stats(40)
69-
p.print_callees('types.py:846\(validate_value', 20)
70-
p.print_callees('types.py:828\(_validate_primitive_value', 20)
71-
p.print_callees('tabletunnel.py:185\(write', 20)
74+
p.print_callees("types.py:846\(validate_value", 20)
75+
p.print_callees("types.py:828\(_validate_primitive_value", 20)
76+
p.print_callees("tabletunnel.py:185\(write", 20)
7277

7378

7479
def test_write(odps, schema, tunnel):
75-
table_name = 'pyodps_test_tunnel_write_performance'
80+
table_name = "pyodps_test_tunnel_write_performance"
7681
odps.create_table(table_name, schema, if_not_exists=True)
7782
ss = tunnel.create_upload_session(table_name)
7883
r = ss.new_record()
7984

8085
start = time.time()
8186
with ss.open_record_writer(0) as writer, profiled():
8287
for i in range(DATA_AMOUNT):
83-
r[0] = 2**63-1
88+
r[0] = 2**63 - 1
8489
r[1] = 0.0001
8590
r[2] = datetime(2015, 11, 11) if not NUMERIC_ONLY else None
8691
r[3] = True
8792
r[4] = STRING_LITERAL if not NUMERIC_ONLY else None
88-
r[5] = Decimal('3.15') if not NUMERIC_ONLY else None
93+
r[5] = Decimal("3.15") if not NUMERIC_ONLY else None
8994
writer.write(r)
9095
n_bytes = writer.n_bytes
91-
print(n_bytes, 'bytes', float(n_bytes) / 1024 / 1024 / (time.time() - start), 'MiB/s')
96+
print(
97+
n_bytes, "bytes", float(n_bytes) / 1024 / 1024 / (time.time() - start), "MiB/s"
98+
)
9299
ss.commit([0])
93100
odps.delete_table(table_name, if_exists=True)
94101

95102

96103
def test_read(odps, schema, tunnel):
97-
table_name = 'pyodps_test_tunnel_read_performance'
104+
table_name = "pyodps_test_tunnel_read_performance"
98105
odps.delete_table(table_name, if_exists=True)
99106
t = odps.create_table(table_name, schema)
100107

101108
def gen_data():
102109
for i in range(DATA_AMOUNT):
103110
r = t.new_record()
104-
r[0] = 2 ** 63 - 1
111+
r[0] = 2**63 - 1
105112
r[1] = 0.0001
106113
r[2] = datetime(2015, 11, 11) if not NUMERIC_ONLY else None
107114
r[3] = True
108115
r[4] = STRING_LITERAL if not NUMERIC_ONLY else None
109-
r[5] = Decimal('3.15') if not NUMERIC_ONLY else None
116+
r[5] = Decimal("3.15") if not NUMERIC_ONLY else None
110117
yield r
111118

112119
odps.write_table(t, gen_data())
@@ -119,28 +126,34 @@ def gen_data():
119126
for _ in reader:
120127
cnt += 1
121128
n_bytes = reader.n_bytes
122-
print(n_bytes, 'bytes', float(n_bytes) / 1024 / 1024 / (time.time() - start), 'MiB/s')
129+
print(
130+
n_bytes, "bytes", float(n_bytes) / 1024 / 1024 / (time.time() - start), "MiB/s"
131+
)
123132
assert DATA_AMOUNT == cnt
124133
odps.delete_table(table_name, if_exists=True)
125134

126135

127136
def test_buffered_write(odps, schema, tunnel):
128-
table_name = 'test_tunnel_bufferred_write'
137+
table_name = "test_tunnel_bufferred_write"
129138
odps.create_table(table_name, schema, if_not_exists=True)
130139
ss = tunnel.create_upload_session(table_name)
131140
r = ss.new_record()
132141

133142
start = time.time()
134-
with ss.open_record_writer(buffer_size=BUFFER_SIZE, compress=COMPRESS_DATA) as writer:
143+
with ss.open_record_writer(
144+
buffer_size=BUFFER_SIZE, compress=COMPRESS_DATA
145+
) as writer:
135146
for i in range(DATA_AMOUNT):
136-
r[0] = 2**63-1
147+
r[0] = 2**63 - 1
137148
r[1] = 0.0001
138149
r[2] = datetime(2015, 11, 11) if not NUMERIC_ONLY else None
139150
r[3] = True
140151
r[4] = STRING_LITERAL if not NUMERIC_ONLY else None
141-
r[5] = Decimal('3.15') if not NUMERIC_ONLY else None
152+
r[5] = Decimal("3.15") if not NUMERIC_ONLY else None
142153
writer.write(r)
143154
n_bytes = writer.n_bytes
144-
print(n_bytes, 'bytes', float(n_bytes) / 1024 / 1024 / (time.time() - start), 'MiB/s')
155+
print(
156+
n_bytes, "bytes", float(n_bytes) / 1024 / 1024 / (time.time() - start), "MiB/s"
157+
)
145158
ss.commit(writer.get_blocks_written())
146159
odps.delete_table(table_name, if_exists=True)

benchmarks/perf_types.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,60 +21,64 @@
2121

2222
import pytest
2323

24-
from odps.models import TableSchema, Record
24+
from odps.models import Record, TableSchema
2525

2626
COMPRESS_DATA = True
2727
BUFFER_SIZE = 1024 * 1024
2828
DATA_AMOUNT = 100000
29-
STRING_LITERAL = "Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
29+
STRING_LITERAL = (
30+
"Soft kitty, warm kitty, little ball of fur; happy kitty, sleepy kitty, purr, purr"
31+
)
3032

3133

3234
@pytest.fixture
3335
def schema():
3436
pr = cProfile.Profile()
3537
pr.enable()
36-
fields = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
37-
types = ['bigint', 'double', 'datetime', 'boolean', 'string', 'decimal']
38+
fields = ["bigint", "double", "datetime", "boolean", "string", "decimal"]
39+
types = ["bigint", "double", "datetime", "boolean", "string", "decimal"]
3840
try:
39-
yield TableSchema.from_lists(fields, types)
41+
schema = TableSchema.from_lists(fields, types)
42+
schema.build_snapshot()
43+
yield schema
4044
finally:
4145
p = Stats(pr)
4246
p.strip_dirs()
43-
p.sort_stats('cumtime')
47+
p.sort_stats("cumtime")
4448
p.print_stats(40)
4549

4650

4751
def test_set_record_field_bigint(schema):
4852
r = Record(schema=schema)
4953
for i in range(10**6):
50-
r['bigint'] = 2**63-1
54+
r["bigint"] = 2**63 - 1
5155

5256

5357
def test_set_record_field_double(schema):
5458
r = Record(schema=schema)
5559
for i in range(10**6):
56-
r['double'] = 0.0001
60+
r["double"] = 0.0001
5761

5862

5963
def test_set_record_field_boolean(schema):
6064
r = Record(schema=schema)
6165
for i in range(10**6):
62-
r['boolean'] = False
66+
r["boolean"] = False
6367

6468

6569
def test_set_record_field_string(schema):
6670
r = Record(schema=schema)
6771
for i in range(10**6):
68-
r['string'] = STRING_LITERAL
72+
r["string"] = STRING_LITERAL
6973

7074

7175
def test_write_set_record_field_datetime(schema):
7276
r = Record(schema=schema)
7377
for i in range(10**6):
74-
r['datetime'] = datetime(2016, 1, 1)
78+
r["datetime"] = datetime(2016, 1, 1)
7579

7680

7781
def test_set_record_field_decimal(schema):
7882
r = Record(schema=schema)
7983
for i in range(10**6):
80-
r['decimal'] = Decimal('1.111111')
84+
r["decimal"] = Decimal("1.111111")

0 commit comments

Comments
 (0)