CoinMarketCap.py
Browse files- coinmarketDataset.py +90 -0
coinmarketDataset.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from datetime import datetime
|
6 |
+
from .creator import create_dataset
|
7 |
+
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
|
11 |
+
class CoinMarketDataset:
|
12 |
+
dataset = []
|
13 |
+
|
14 |
+
def __init__(self, main_features, start_date=None, end_date=None, window_size=10):
|
15 |
+
import requests
|
16 |
+
|
17 |
+
# Fetching data from the server
|
18 |
+
url = "https://web-api.coinmarketcap.com/v1/cryptocurrency/ohlcv/historical"
|
19 |
+
# param = {"convert":"USD","slug":"bitcoin","time_end":"1601510400","time_start":"1367107200"}
|
20 |
+
param = {"convert": "USD", "slug": "bitcoin", "time_end": "1672384689", "time_start": "1367107200"}
|
21 |
+
content = requests.get(url=url, params=param).json()
|
22 |
+
df = pd.json_normalize(content['data']['quotes'])
|
23 |
+
|
24 |
+
# Extracting and renaming the important variables
|
25 |
+
df['Date'] = pd.to_datetime(df['quote.USD.timestamp']).dt.tz_localize(None)
|
26 |
+
df['Low'] = df['quote.USD.low']
|
27 |
+
df['High'] = df['quote.USD.high']
|
28 |
+
df['Open'] = df['quote.USD.open']
|
29 |
+
df['Close'] = df['quote.USD.close']
|
30 |
+
df['Volume'] = df['quote.USD.volume']
|
31 |
+
|
32 |
+
# Drop original and redundant columns
|
33 |
+
df = df.drop(columns=['time_open', 'time_close', 'time_high', 'time_low', 'quote.USD.low', 'quote.USD.high',
|
34 |
+
'quote.USD.open', 'quote.USD.close', 'quote.USD.volume', 'quote.USD.market_cap',
|
35 |
+
'quote.USD.timestamp'])
|
36 |
+
|
37 |
+
# Creating a new feature for better representing day-wise values
|
38 |
+
df['Mean'] = (df['Low'] + df['High']) / 2
|
39 |
+
|
40 |
+
# Cleaning the data for any NaN or Null fields
|
41 |
+
df = df.dropna()
|
42 |
+
|
43 |
+
# Creating a copy for making small changes
|
44 |
+
dataset_for_prediction = df.copy()
|
45 |
+
# print(dataset_for_prediction.keys())
|
46 |
+
dataset_for_prediction['Actual'] = dataset_for_prediction['Mean'].shift()
|
47 |
+
dataset_for_prediction = dataset_for_prediction.dropna()
|
48 |
+
|
49 |
+
# date time typecast
|
50 |
+
dataset_for_prediction['Date'] = pd.to_datetime(dataset_for_prediction['Date'])
|
51 |
+
dataset_for_prediction.index = dataset_for_prediction['Date']
|
52 |
+
|
53 |
+
drop_cols = ['High', 'Low', 'Close', 'Open', 'Volume', 'Mean']
|
54 |
+
for item in main_features:
|
55 |
+
if item in drop_cols:
|
56 |
+
drop_cols.remove(item)
|
57 |
+
df = df.drop(drop_cols, axis=1)
|
58 |
+
|
59 |
+
if start_date == '-1':
|
60 |
+
start_date = df.iloc[0].Date
|
61 |
+
else:
|
62 |
+
start_date = datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S')
|
63 |
+
|
64 |
+
if end_date == '-1':
|
65 |
+
end_date = df.iloc[-1].Date
|
66 |
+
else:
|
67 |
+
end_date = datetime.strptime(str(end_date), '%Y-%m-%d %H:%M:%S')
|
68 |
+
|
69 |
+
start_index = 0
|
70 |
+
end_index = df.shape[0] - 1
|
71 |
+
for i in range(df.shape[0]):
|
72 |
+
if df.Date[i] <= start_date:
|
73 |
+
start_index = i
|
74 |
+
|
75 |
+
for i in range(df.shape[0] - 1, -1, -1):
|
76 |
+
if df.Date[i] >= end_date:
|
77 |
+
end_index = i
|
78 |
+
|
79 |
+
# prediction mean based upon open
|
80 |
+
dates = df.Date[start_index:end_index]
|
81 |
+
df = df.drop('Date', axis=1)
|
82 |
+
arr = np.array(df)
|
83 |
+
arr = arr[start_index:end_index]
|
84 |
+
features = df.columns
|
85 |
+
|
86 |
+
self.dataset, self.profit_calculator = create_dataset(arr, list(dates), look_back=window_size, features=features)
|
87 |
+
|
88 |
+
|
89 |
+
def get_dataset(self):
|
90 |
+
return self.dataset, self.profit_calculator
|