-
Notifications
You must be signed in to change notification settings - Fork 74
Expand file tree
/
Copy pathfile.py
More file actions
196 lines (160 loc) · 5.38 KB
/
file.py
File metadata and controls
196 lines (160 loc) · 5.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
from io import BufferedReader
from llmengine.api_engine import DEFAULT_TIMEOUT, APIEngine
from llmengine.data_types import (
DeleteFileResponse,
GetFileContentResponse,
GetFileResponse,
ListFilesResponse,
UploadFileResponse,
)
class File(APIEngine):
"""
File API. This API is used to upload private files to LLM engine so that fine-tunes can access them for training and validation data.
Functions are provided to upload, get, list, and delete files, as well as to get the contents of a file.
"""
@classmethod
def upload(cls, file: BufferedReader) -> UploadFileResponse:
"""
Uploads a file to LLM engine.
For use in [FineTune creation](./#llmengine.fine_tuning.FineTune.create), this should be a CSV file with two columns: `prompt` and `response`.
A maximum of 100,000 rows of data is currently supported.
Args:
file (`BufferedReader`):
A local file opened with `open(file_path, "r")`
Returns:
UploadFileResponse: an object that contains the ID of the uploaded file
=== "Uploading file in Python"
```python
from llmengine import File
response = File.upload(open("training_dataset.csv", "r"))
print(response.json())
```
=== "Response in JSON"
```json
{
"id": "file-abc123"
}
```
"""
files = {"file": file}
response = cls.post_file(
resource_name="v1/files",
files=files,
timeout=DEFAULT_TIMEOUT,
)
return UploadFileResponse.parse_obj(response)
@classmethod
def get(cls, file_id: str) -> GetFileResponse:
"""
Get file metadata, including filename and size.
Args:
file_id (`str`):
ID of the file
Returns:
GetFileResponse: an object that contains the ID, filename, and size of the requested file
=== "Getting metadata about file in Python"
```python
from llmengine import File
response = File.get(
file_id="file-abc123",
)
print(response.json())
```
=== "Response in JSON"
```json
{
"id": "file-abc123",
"filename": "training_dataset.csv",
"size": 100
}
```
"""
response = cls._get(f"v1/files/{file_id}", timeout=DEFAULT_TIMEOUT)
return GetFileResponse.parse_obj(response)
@classmethod
def list(cls) -> ListFilesResponse:
"""
List metadata about all files, e.g. their filenames and sizes.
Returns:
ListFilesResponse: an object that contains a list of all files and their filenames and sizes
=== "Listing files in Python"
```python
from llmengine import File
response = File.list()
print(response.json())
```
=== "Response in JSON"
```json
{
"files": [
{
"id": "file-abc123",
"filename": "training_dataset.csv",
"size": 100
},
{
"id": "file-def456",
"filename": "validation_dataset.csv",
"size": 50
}
]
}
```
"""
response = cls._get("v1/files", timeout=30)
return ListFilesResponse.parse_obj(response)
@classmethod
def delete(cls, file_id: str) -> DeleteFileResponse:
"""
Deletes a file.
Args:
file_id (`str`):
ID of the file
Returns:
DeleteFileResponse: an object that contains whether the deletion was successful
=== "Deleting file in Python"
```python
from llmengine import File
response = File.delete(file_id="file-abc123")
print(response.json())
```
=== "Response in JSON"
```json
{
"deleted": true
}
```
"""
response = cls._delete(
f"v1/files/{file_id}",
timeout=DEFAULT_TIMEOUT,
)
return DeleteFileResponse.parse_obj(response)
@classmethod
def download(cls, file_id: str) -> GetFileContentResponse:
"""
Get contents of a file, as a string. (If the uploaded file is in binary, a string encoding will be returned.)
Args:
file_id (`str`):
ID of the file
Returns:
GetFileContentResponse: an object that contains the ID and content of the file
=== "Getting file content in Python"
```python
from llmengine import File
response = File.download(file_id="file-abc123")
print(response.json())
```
=== "Response in JSON"
```json
{
"id": "file-abc123",
"content": "Hello world!"
}
```
"""
response = cls._get(
f"v1/files/{file_id}/content",
timeout=DEFAULT_TIMEOUT,
)
return GetFileContentResponse.parse_obj(response)