TrendSeeker AI

TrendSeeker tracks real-time trends, automates Twitter updates, allows customizable analysis, and provides a web app with a chatbot. Future plans include token trading and DeFi actions. It's open-source for community collaboration. For additional info, check out our [GitHub].

=====================================================

> [Twitter_Snippet.code]

from requests_html import HTMLSession, HTML

from lxml.etree import ParserError

session = HTMLSession()

class Profile:

"""

Parse twitter profile and split informations into class as attribute.

Attributes:

- name

- username

- birthday

- location

- biography

- website

- profile_photo

- banner_photo

- likes_count

- tweets_count

- followers_count

- following_count

- is_verified

- is_private

- user_id

"""

def __init__(self, username):

headers = {

"Accept": "application/json, text/javascript, */*; q=0.01",

"Referer": f"https://twitter.com/{username}",

"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8",

"X-Twitter-Active-User": "yes",

"X-Requested-With": "XMLHttpRequest",

"Accept-Language": "en-US",

}

page = session.get(f"https://twitter.com/{username}", headers=headers)

self.username = username

self.__parse_profile(page)

def __parse_profile(self, page):

try:

html = HTML(html=page.text, url="bunk", default_encoding="utf-8")

except KeyError:

raise ValueError(

f'Oops! Either "{self.username}" does not exist or is private.'

)

except ParserError:

pass

try:

self.is_private = html.find(".ProfileHeaderCard-badges .Icon--protected")[0]

self.is_private = True

except:

self.is_private = False

try:

self.is_verified = html.find(".ProfileHeaderCard-badges .Icon--verified")[0]

self.is_verified = True

except:

self.is_verified = False

self.location = html.find(".ProfileHeaderCard-locationText")[0].text

if not self.location:

self.location = None

self.birthday = html.find(".ProfileHeaderCard-birthdateText")[0].text

if self.birthday:

self.birthday = self.birthday.replace("Born ", "")

else:

self.birthday = None

self.profile_photo = html.find(".ProfileAvatar-image")[0].attrs["src"]

try:

self.banner_photo = html.find(".ProfileCanopy-headerBg img")[0].attrs["src"]

except KeyError:

self.banner_photo = None

page_title = html.find("title")[0].text

self.name = page_title[: page_title.find("(")].strip()

self.user_id = html.find(".ProfileNav")[0].attrs["data-user-id"]

self.biography = html.find(".ProfileHeaderCard-bio")[0].text

if not self.birthday:

self.birthday = None

self.website = html.find(".ProfileHeaderCard-urlText")[0].text

if not self.website:

self.website = None

# get total tweets count if available

try:

q = html.find('li[class*="--tweets"] span[data-count]')[0].attrs["data-count"]

self.tweets_count = int(q)

except:

self.tweets_count = None

# get total following count if available

try:

q = html.find('li[class*="--following"] span[data-count]')[0].attrs["data-count"]

self.following_count = int(q)

except:

self.following_count = None

# get total follower count if available

try:

q = html.find('li[class*="--followers"] span[data-count]')[0].attrs["data-count"]

self.followers_count = int(q)

except:

self.followers_count = None

# get total like count if available

try:

q = html.find('li[class*="--favorites"] span[data-count]')[0].attrs["data-count"]

self.likes_count = int(q)

except:

self.likes_count = None

def to_dict(self):

return dict(

name=self.name,

username=self.username,

birthday=self.birthday,

biography=self.biography,

location=self.location,

website=self.website,

profile_photo=self.profile_photo,

banner_photo=self.banner_photo,

likes_count=self.likes_count,

tweets_count=self.tweets_count,

followers_count=self.followers_count,

following_count=self.following_count,

is_verified=self.is_verified,

is_private=self.is_private,

user_id=self.user_id

)

def __dir__(self):

return [

"name",

"username",

"birthday",

"location",

"biography",

"website",

"profile_photo",

'banner_photo'

"likes_count",

"tweets_count",

"followers_count",

"following_count",

"is_verified",

"is_private",

"user_id"

]

def __repr__(self):

return f"<profile {self.username}@twitter>"

from requests_html import HTML, HTMLSession
session = HTMLSession()

def get_trends(): trends = []
headers = { "X-Twitter-Active-User": "yes", "X-Requested-With": "XMLHttpRequest", "Accept-Language": "en-US", }
html = session.get("https://twitter.com/i/trends", headers=headers) html = html.json()["module_html"]
html = HTML(html=html, url="bunk", default_encoding="utf-8")
for trend_item in html.find("li"): trend_text = trend_item.attrs["data-trend-name"]
trends.append(trend_text)
return trends

import refrom requests_html import HTMLSession, HTMLfrom datetime import datetimefrom urllib.parse import quotefrom lxml.etree import ParserError
session = HTMLSession()

def get_tweets(query, pages=25): """Gets tweets for a given user, via the Twitter frontend API."""
after_part = ( f"include_available_features=1&include_entities=1&include_new_items_bar=true" ) if query.startswith("#"): query = quote(query) url = f"https://x.com/i/search/timeline?f=tweets&vertical=default&q={query}&src=tyah&reset_error_state=false&" else: url = f"https://x.com/i/profiles/show/{query}/timeline/tweets?" url += after_part
headers = { "Accept": "application/json, text/javascript, */*; q=0.01", "Referer": f"https://x.com/{query}", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8", "X-Twitter-Active-User": "yes", "X-Requested-With": "XMLHttpRequest", "Accept-Language": "en-US", }
def gen_tweets(pages): request = session.get(url + "&max_position", headers=headers)
while pages > 0: try: json_response = request.json() html = HTML( html=json_response["items_html"], url="bunk", default_encoding="utf-8", ) except KeyError: raise ValueError( f'Oops! Either "{query}" does not exist or is private.' ) except ParserError: break
comma = "," dot = "." tweets = [] for tweet, profile in zip( html.find(".stream-item"), html.find(".js-profile-popup-actionable") ): # 10~11 html elements have `.stream-item` class and also their `data-item-type` is `tweet` # but their content doesn't look like a tweet's content try: text = tweet.find(".tweet-text")[0].full_text except IndexError: # issue #50 continue
tweet_id = tweet.attrs["data-item-id"]
tweet_url = profile.attrs["data-permalink-path"]
username = profile.attrs["data-screen-name"]
user_id = profile.attrs["data-user-id"]
is_pinned = bool(tweet.find("div.pinned"))
time = datetime.fromtimestamp( int(tweet.find("._timestamp")[0].attrs["data-time-ms"]) / 1000.0 )
interactions = [x.text for x in tweet.find(".ProfileTweet-actionCount")]
replies = int( interactions[0].split(" ")[0].replace(comma, "").replace(dot, "") or interactions[3] )
retweets = int( interactions[1].split(" ")[0].replace(comma, "").replace(dot, "") or interactions[4] or interactions[5] )
likes = int( interactions[2].split(" ")[0].replace(comma, "").replace(dot, "") or interactions[6] or interactions[7] )
hashtags = [ hashtag_node.full_text for hashtag_node in tweet.find(".x-hashtag") ]
urls = [ url_node.attrs["data-expanded-url"] for url_node in ( tweet.find("a.x-timeline-link:not(.u-hidden)") + tweet.find( "[class='js-tweet-text-container'] a[data-expanded-url]" ) ) ] urls = list(set(urls)) # delete duplicated elements
photos = [ photo_node.attrs["data-image-url"] for photo_node in tweet.find(".AdaptiveMedia-photoContainer") ]
is_retweet = ( True if tweet.find(".js-stream-tweet")[0].attrs.get( "data-retweet-id", None ) else False )
videos = [] video_nodes = tweet.find(".PlayableMedia-player") for node in video_nodes: styles = node.attrs["style"].split() for style in styles: if style.startswith("background"): tmp = style.split("/")[-1] video_id = ( tmp[: tmp.index(".jpg")] if ".jpg" in tmp else tmp[: tmp.index(".png")] if ".png" in tmp else None ) videos.append({"id": video_id})
tweets.append( { "tweetId": tweet_id, "tweetUrl": tweet_url, "username": username, "userId": user_id, "isRetweet": is_retweet, "isPinned": is_pinned, "time": time, "text": text, "replies": replies, "retweets": retweets, "likes": likes, "entries": { "hashtags": hashtags, "urls": urls, "photos": photos, "videos": videos, }, } )
last_tweet = html.find(".stream-item")[-1].attrs["data-item-id"]
for tweet in tweets: tweet["text"] = re.sub(r"(\S)http", "\g<1> http", tweet["text"], 1) tweet["text"] = re.sub(r"(\S)pic\.x", "\g<1> pic.x", tweet["text"], 1) yield tweet
request = session.get( url, params={"max_position": json_response["min_position"]}, headers=headers, ) pages += -1
yield from gen_tweets(pages)

# for searching:## https://x.com/i/search/timeline?vertical=default&q=foof&src=typd&composed_count=0&include_available_features=1&include_entities=1&include_new_items_bar=true&interval=30000&latent_count=0# replace 'foof' with your query string. Not sure how to decode yet but it seems to work.

[TWITTER CODE]

> [TikTok_Snippet.code]

export = {

scrape: [

'user',

'hashtag',

'trend',

'music',

'discover_user',

'discover_hashtag',

'discover_music',

'history',

'video',

'from-file',

'userprofile',

chronologicalTypes: ['user'],

history: ['user', 'hashtag', 'trend', 'music'],

requiredSession: ['user', 'hashtag', 'trend', 'music'],

sourceType: {

user: 8,

music: 11,

trend: 12,

/**

* verifyFp is used to bypass captcha

* Currently this method is with hardcoded values

* later I or someone else will implement proper way to generate valid value

verifyFp: () => {

const variants = [];

return variants[Math.floor(Math.random() * variants.length)];

/**

* Generate random user-agent with randon versions(fake)

userAgent: () => {

const os = [

'Macintosh; Intel Mac OS X 10_15_7',

'Macintosh; Intel Mac OS X 10_15_5',

'Macintosh; Intel Mac OS X 10_11_6',

'Macintosh; Intel Mac OS X 10_6_6',

'Macintosh; Intel Mac OS X 10_9_5',

'Macintosh; Intel Mac OS X 10_10_5',

'Macintosh; Intel Mac OS X 10_7_5',

'Macintosh; Intel Mac OS X 10_11_3',

'Macintosh; Intel Mac OS X 10_10_3',

'Macintosh; Intel Mac OS X 10_6_8',

'Macintosh; Intel Mac OS X 10_10_2',

'Macintosh; Intel Mac OS X 10_10_3',

'Macintosh; Intel Mac OS X 10_11_5',

'Windows NT 10.0; Win64; x64',

'Windows NT 10.0; WOW64',

'Windows NT 10.0',

];

return `Mozilla/5.0 (${os[Math.floor(Math.random() * os.length)]}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${Math.floor(

Math.random() * 3,

) + 87}.0.${Math.floor(Math.random() * 190) + 4100}.${Math.floor(Math.random() * 50) + 140} Safari/537.36`;

};

/* eslint-disable class-methods-use-this */

/* eslint-disable no-param-reassign */

/* eslint-disable consistent-return */

/* eslint-disable no-console */

import request, { OptionsWithUri, CookieJar } from 'request';

import rp from 'request-promise';

import { Agent } from 'http';

import { createWriteStream, writeFile } from 'fs';

import { fromCallback } from 'bluebird';

import archiver from 'archiver';

import { SocksProxyAgent } from 'socks-proxy-agent';

import { forEachLimit } from 'async';

import { MultipleBar } from '../helpers';

import { DownloaderConstructor, PostCollector, DownloadParams, Proxy, Headers } from '../types';

export class Downloader {

public progress: boolean;

public mbars: MultipleBar;

public progressBar: any[];

private proxy: string[] | string;

public noWaterMark: boolean;

public filepath: string;

public bulk: boolean;

public headers: Headers;

public cookieJar: CookieJar;

constructor({ progress, proxy, noWaterMark, headers, filepath, bulk, cookieJar }: DownloaderConstructor) {

this.progress = true || progress;

this.progressBar = [];

this.noWaterMark = noWaterMark;

this.headers = headers;

this.filepath = filepath;

this.mbars = new MultipleBar();

this.proxy = proxy;

this.bulk = bulk;

this.cookieJar = cookieJar;

}

/**

* Get proxy

private get getProxy(): Proxy {

if (Array.isArray(this.proxy)) {

const selectProxy = this.proxy.length ? this.proxy[Math.floor(Math.random() * this.proxy.length)] : '';

return {

socks: false,

proxy: selectProxy,

};

}

if (this.proxy.indexOf('socks4://') > -1 || this.proxy.indexOf('socks5://') > -1) {

return {

socks: true,

proxy: new SocksProxyAgent(this.proxy as string),

};

}

return {

socks: false,

proxy: this.proxy as string,

};

}

/**

* Add new bar to indicate download progress

* @param {number} len

public addBar(type: boolean, len: number): any[] {

this.progressBar.push(

this.mbars.newBar(`Downloading (${!type ? 'WITH WM' : 'WITHOUT WM'}) :id [:bar] :percent`, {

complete: '=',

incomplete: ' ',

width: 30,

total: len,

}),

);

return this.progressBar[this.progressBar.length - 1];

}

/**

* Convert video file to the buffer

* @param {*} item

public toBuffer(item: PostCollector): Promise<Buffer> {

return new Promise((resolve, reject) => {

const proxy = this.getProxy;

let r = request;

let barIndex;

let buffer = Buffer.from('');

if (proxy.proxy && !proxy.socks) {

r = request.defaults({ proxy: `http://${proxy.proxy}/` });

}

if (proxy.proxy && proxy.socks) {

r = request.defaults({ agent: (proxy.proxy as unknown) as Agent });

}

r.get({

url: item.videoUrlNoWaterMark ? item.videoUrlNoWaterMark : item.videoUrl,

headers: this.headers,

jar: this.cookieJar,

})

.on('response', response => {

const len = parseInt(response.headers['content-length'] as string, 10);

if (this.progress && !this.bulk && len) {

barIndex = this.addBar(!!item.videoUrlNoWaterMark, len);

}

if (this.progress && !this.bulk && !len) {

console.log(`Empty response! You can try again with a proxy! Can't download video: ${item.id}`);

}

})

.on('data', chunk => {

if (chunk.length) {

buffer = Buffer.concat([buffer, chunk as Buffer]);

if (this.progress && !this.bulk && barIndex && barIndex.hasOwnProperty('tick')) {

barIndex.tick(chunk.length, { id: item.id });

}

})

.on('end', () => {

resolve(buffer);

})

.on('error', () => {

reject(new Error(`Cant download video: ${item.id}. If you were using proxy, please try without it.`));

});

}

/**

* Download posts

* if {zip} is true then zip the result else save posts to the {folder}

public downloadPosts({ zip, folder, collector, fileName, asyncDownload }: DownloadParams) {

return new Promise((resolve, reject) => {

const saveDestination = zip ? `${fileName}.zip` : folder;

const archive = archiver('zip', {

gzip: true,

zlib: { level: 9 },

});

if (zip) {

const output = createWriteStream(saveDestination);

archive.pipe(output);

}

forEachLimit(

collector,

asyncDownload,

(item: PostCollector, cb) => {

this.toBuffer(item)

.then(async buffer => {

if (buffer.length) {

item.downloaded = true;

if (zip) {

archive.append(buffer, { name: `${item.id}.mp4` });

} else {

await fromCallback(cback => writeFile(`${saveDestination}/${item.id}.mp4`, buffer, cback));

}

} else {

item.downloaded = false;

}

cb(null);

})

.catch(() => {

item.downloaded = false;

cb(null);

});

error => {

if (error) {

return reject(error);

}

if (zip) {

archive.finalize();

archive.on('end', () => resolve(''));

} else {

resolve('');

}

);

});

}

/**

* Download single video without the watermark

* @param post

public async downloadSingleVideo(post: PostCollector) {

const proxy = this.getProxy;

let url = post.videoUrlNoWaterMark;

if (!url) {

url = post.videoUrl;

}

const options = ({

uri: url,

method: 'GET',

jar: this.cookieJar,

headers: this.headers,

encoding: null,

...(proxy.proxy && proxy.socks ? { agent: proxy.proxy } : {}),

...(proxy.proxy && !proxy.socks ? { proxy: `http://${proxy.proxy}/` } : {}),

} as unknown) as OptionsWithUri;

const result = await rp(options);

await fromCallback(cb => writeFile(`${this.filepath}/${post.id}.mp4`, result, cb));

}

import fs from 'fs';

import { ScrapeType, Result, RequestQuery, UserMetadata, PostCollector, HashtagMetadata } from '../types';

import { TikTokScraper } from './TikTok';

import CONST from '../constant';

jest.mock('request-promise-native');

jest.mock('request-promise');

describe('TikTok Scraper MODULE(promise): user(valid input data)', () => {

let instance;

beforeAll(() => {

instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 3,

filetype: '',

filepath: '',

input: 'tiktok',

noWaterMark: false,

type: 'user',

headers: {

'user-agent': 'Custom user-agent',

proxy: '',

number: 5,

});

it('user input should not be empty', async () => {

expect(instance).toBeInstanceOf(TikTokScraper);

expect(instance.input).toContain('tiktok');

});

it('set custom user-agent', async () => {

expect(instance).toBeInstanceOf(TikTokScraper);

expect(instance.headers['user-agent']).toContain('Custom user-agent');

});

it('getUserId should return a valid Object', async () => {

const userId: RequestQuery = await instance.getUserId();

expect(userId).toEqual({

id: '107955',

lang: '',

maxCursor: 0,

minCursor: 0,

secUid: '',

sourceType: 8,

verifyFp: '',

});

it('result should contain array value with the length 5', async () => {

const posts: Result = await instance.scrape();

expect(posts.collector.length).toEqual(5);

});

describe('TikTok Scraper MODULE(event): user(valid input data)', () => {

let instance;

beforeAll(() => {

instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: 'tiktok',

type: 'user',

headers: {

'user-agent': 'Custom user-agent',

proxy: '',

number: 1,

event: true,

});

it('result should emit "done" event if task was completed', done => {

instance.on('done', data => {

expect(data).toEqual('completed');

done();

});

instance.scrape();

});

describe('TikTok Scraper MODULE(promise): user(invalid input data)', () => {

it('Throw error if username is empty', () => {

const instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: '',

type: 'user',

headers: {

'user-agent': 'okhttp',

proxy: '',

number: 5,

});

expect(instance.scrape()).rejects.toEqual('Missing input');

});

it('Throw error if wrong scraping type was provided', () => {

const instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: '',

type: 'fake' as ScrapeType,

headers: {

'user-agent': 'okhttp',

proxy: '',

number: 5,

});

expect(instance.scrape()).rejects.toEqual(`Missing scraping type. Scrape types: ${CONST.scrape} `);

});

describe('TikTok Scraper MODULE(event): user(invalid input data)', () => {

it('Throw error if username is empty', done => {

const instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: '',

type: 'user',

headers: {

'user-agent': 'okhttp',

proxy: '',

number: 1,

event: true,

});

instance.on('error', data => {

expect(data).toEqual('Missing input');

done();

});

instance.scrape();

});

it('Throw error if wrong scraping type was provided', done => {

const instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: '',

type: 'fake' as ScrapeType,

headers: {

'user-agent': 'okhttp',

proxy: '',

number: 5,

event: true,

});

instance.on('error', data => {

expect(data).toEqual(`Missing scraping type. Scrape types: ${CONST.scrape} `);

done();

});

instance.scrape();

});

describe('TikTok Scraper MODULE(promise): user(save to a file)', () => {

let instance;

let posts: Result;

beforeAll(async () => {

jest.spyOn(fs, 'writeFile').mockImplementation((file, option, cb) => cb(null));

instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: 'all',

filepath: '',

input: 'tiktok',

type: 'user',

headers: {

'user-agent': 'okhttp',

proxy: '',

number: 5,

});

posts = await instance.scrape();

});

afterAll(() => {

jest.restoreAllMocks();

});

it('fs.WriteFile should be called 2 times. Save to a csv and json', async () => {

expect(fs.writeFile).toHaveBeenCalledTimes(2);

});

it('result should contain a valid file names for the csv and json files', async () => {

expect(posts.csv).toMatch(/^(\w+)_([0-9]{13}).csv$/);

expect(posts.json).toMatch(/^(\w+)_([0-9]{13}).json$/);

});

describe('TikTok Scraper MODULE(promise): hashtag(valid input data)', () => {

let instance;

beforeAll(() => {

instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: 'summer',

type: 'hashtag',

headers: {

'user-agent': 'okhttp',

proxy: '',

number: 5,

});

it('hashtag input should not be empty', async () => {

expect(instance).toBeInstanceOf(TikTokScraper);

expect(instance.input).toContain('summer');

});

it('getHashTagId should return a valid Object', async () => {

const hashtag: RequestQuery = await instance.getHashTagId();

expect(hashtag).toEqual({ aid: 1988, challengeID: '99770', count: 30, cursor: 0, user_agent: 'okhttp', verifyFp: '' });

});

// it('result should contain array value with the length 5', async () => {

// const posts: Result = await instance.scrape();

// expect(posts.collector.length).toEqual(5);

// });

});

describe('TikTok Scraper MODULE(promise): signUrl', () => {

let instance;

beforeAll(() => {

instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: 'https://m.tiktok.com/share/item/list?secUid=&id=355503&type=3&count=30&minCursor=0&maxCursor=0&shareUid=&lang=',

type: 'signature',

headers: {

'user-agent': 'okhttp',

proxy: '',

number: 5,

});

it('signUrl should return a valid signature', async () => {

const signature: string = await instance.signUrl();

expect(signature).not.toBeNull();

});

it('Throw error if input url is empty', async () => {

instance.input = '';

await expect(instance.signUrl()).rejects.toBe(`Url is missing`);

});

describe('TikTok Scraper MODULE(promise): getHashtagInfo', () => {

let instance;

const hasthagName = 'summer';

beforeAll(() => {

instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: hasthagName,

type: 'single_hashtag',

headers: {

'user-agent': 'okhttp',

proxy: '',

number: 5,

});

it('getHashtagInfo should return a valid Object', async () => {

const hashtag: HashtagMetadata = await instance.getHashtagInfo();

expect(hashtag).toEqual({

challenge: {

id: '99770',

title: 'duett',

desc: 'Habt ihr schon unsere neue Duett-Funktion gecheckt? Oben, unten, links, rechts alles möglich jetzt.',

profileThumb: 'https://p16-va-default.akamaized.net/obj/musically-maliva-obj/92760d2f9cce09720b20ae060081efc8',

profileMedium: 'https://p16-va-default.akamaized.net/obj/musically-maliva-obj/92760d2f9cce09720b20ae060081efc8',

profileLarger: 'https://p16-va-default.akamaized.net/obj/musically-maliva-obj/92760d2f9cce09720b20ae060081efc8',

coverThumb: 'https://p16-va-default.akamaized.net/obj/musically-maliva-obj/fa5fcd3ee0a9581fc26d9e3b811e428e',

coverMedium: 'https://p16-va-default.akamaized.net/obj/musically-maliva-obj/fa5fcd3ee0a9581fc26d9e3b811e428e',

coverLarger: 'https://p16-va-default.akamaized.net/obj/musically-maliva-obj/fa5fcd3ee0a9581fc26d9e3b811e428e',

isCommerce: false,

stats: { videoCount: 0, viewCount: 37100000000 },

shareMeta: { title: '#duett on TikTok', desc: '37099.0m views - Watch awesome short videos created with trending hashtag #duett' },

challengeAnnouncement: {},

});

it('Throw error if input hashtag is empty', async () => {

instance.input = '';

await expect(instance.getHashtagInfo()).rejects.toBe(`Hashtag is missing`);

});

it(`Throw error if hashtag doesn't exist`, async () => {

instance.input = 'na';

await expect(instance.getHashtagInfo()).rejects.toBe(`Can't find hashtag: na`);

});

describe('TikTok Scraper MODULE(promise): getUserProfileInfo', () => {

let instance;

const userName = 'tiktok';

beforeAll(() => {

instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: userName,

type: 'single_user',

headers: {

'user-agent': 'okhttp',

proxy: '',

number: 5,

});

it('getUserProfileInfo should return a valid Object', async () => {

const user: UserMetadata = await instance.getUserProfileInfo();

expect(user).toEqual({

user: {

id: '107955',

uniqueId: 'tiktok',

nickname: 'TikTok',

avatarThumb:

'https://p16-sign-va.tiktokcdn.com/musically-maliva-obj/1645136815763462~c5_100x100.jpeg?x-expires=1610028000&x-signature=kEnsi2vZJE9DYy5q3UH%2FKAIH8pI%3D',

avatarMedium:

'https://p16-sign-va.tiktokcdn.com/musically-maliva-obj/1645136815763462~c5_720x720.jpeg?x-expires=1610028000&x-signature=ZcG9nv927kBXHRsEh9ZeFAGjqzM%3D',

avatarLarger:

'https://p16-sign-va.tiktokcdn.com/musically-maliva-obj/1645136815763462~c5_1080x1080.jpeg?x-expires=1610028000&x-signature=44JuBpJgUlN4dau%2B3eFemKgTrJI%3D',

verified: true,

createTime: 1425144149,

secUid: 'MS4wLjABAAAAv7iSuuXDJGDvJkmH_vz1qkDZYo1apxgzaxdBSeIuPiM',

secret: false,

ftc: false,

relation: 1,

openFavorite: true,

commentSetting: 0,

duetSetting: 0,

stitchSetting: 0,

privateAccount: false,

shortId: '0',

signature: 'It Starts On TikTok',

stats: { followingCount: 496, followerCount: 50100000, heartCount: 246000000, videoCount: 118, diggCount: 53, heart: 246000000 },

itemList: [],

});

it('Throw error if input username is empty', async () => {

instance.input = '';

await expect(instance.getUserProfileInfo()).rejects.toBe(`Username is missing`);

});

describe('TikTok Scraper CLI: user(save progress)', () => {

let instance;

let posts: Result;

beforeAll(async () => {

jest.spyOn(fs, 'writeFile').mockImplementation((file, option, cb) => cb(null));

jest.spyOn(fs, 'readFile').mockImplementation((file, cb) => cb(null, Buffer.from('0')));

instance = new TikTokScraper({

download: true,

cli: true,

zip: true,

store_history: true,

test: true,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: 'tiktok',

type: 'user',

headers: {

'user-agent': 'okhttp',

proxy: '',

number: 5,

});

posts = await instance.scrape();

});

afterAll(() => {

jest.restoreAllMocks();

});

it('fs.readFile should be called 2 times', async () => {

expect(fs.readFile).toHaveBeenCalledTimes(2);

});

it('fs.writeFile should be called 2 times', async () => {

expect(fs.writeFile).toHaveBeenCalledTimes(2);

});

it('result should contain a valid file name for the Zip file', async () => {

expect(posts.zip).toMatch(/^(\w+)_([0-9]{13}).zip$/);

});

describe('TikTok Scraper MODULE(promise): getVideoMeta', () => {

let instance;

beforeEach(() => {

instance = new TikTokScraper({

download: false,

asyncDownload: 5,

asyncScraping: 5,

filetype: '',

filepath: '',

input: 'https://www.tiktok.com/@tiktok/video/6807491984882765062',

type: 'video_meta',

headers: {

'user-agent': CONST.userAgent(),

proxy: '',

number: 5,

hdVideo: false,

});

it('getVideoMeta should return a valid Object', async () => {

const post: PostCollector = await instance.getVideoMeta();

expect(post).toEqual({

id: '6881450806688664838',

text: 'Good vibes only 🤙 @420doggface208 @mickfleetwood @tomhayes603',

createTime: 1602212662,

authorMeta: {

id: '107955',

secUid: 'MS4wLjABAAAAv7iSuuXDJGDvJkmH_vz1qkDZYo1apxgzaxdBSeIuPiM',

nickName: 'TikTok',

following: 491,

fans: 48300000,

heart: 241100000,

video: 112,

digg: 35,

verified: true,

private: false,

signature: 'Make Your Day',

avatar:

'https://p16-sign-va.tiktokcdn.com/musically-maliva-obj/1645136815763462~c5_1080x1080.jpeg?x-expires=1603573200&x-signature=4%2FrCxmt8FiH7M9RY%2Bx%2F7WVzd0Og%3D',

musicMeta: {

musicId: '6881450829518293766',

musicName: 'original sound',

musicAuthor: 'TikTok',

musicOriginal: true,

coverThumb:

'https://p16-sign-va.tiktokcdn.com/musically-maliva-obj/1645136815763462~c5_100x100.jpeg?x-expires=1603573200&x-signature=XGaOhkftgl2fNr%2BT1OpxPVWUWY4%3D',

coverMedium:

'https://p16-sign-va.tiktokcdn.com/musically-maliva-obj/1645136815763462~c5_720x720.jpeg?x-expires=1603573200&x-signature=bl%2BxXbD9ME6Tt4VNcWtPDAX4PZI%3D',

coverLarge:

'https://p16-sign-va.tiktokcdn.com/musically-maliva-obj/1645136815763462~c5_1080x1080.jpeg?x-expires=1603573200&x-signature=4%2FrCxmt8FiH7M9RY%2Bx%2F7WVzd0Og%3D',

duration: 15,

imageUrl:

'https://p16-sign-sg.tiktokcdn.com/obj/tos-maliva-p-0068/5f1e128e900c4008bd6d612964ef7d1b?x-expires=1603508400&x-signature=lXSV%2BKG4%2B8G%2BGJREfeNEys6m3eg%3D',

videoUrl:

'https://v16-web-newkey.tiktokcdn.com/2ea83f8b07e61eb2844a644d0b1ff238/5f939968/video/tos/useast2a/tos-useast2a-pve-0068/2141262fa24c4f7687f2d6b0df121616/?a=1988&br=3316&bt=1658&cr=0&cs=0&cv=1&dr=0&ds=3&er=&l=202010232102490101902192101109C365&lr=tiktok_m&mime_type=video_mp4&qs=0&rc=anFwZTh4N2R3dzMzZzczM0ApNWY0O2QzaDszNzxlOTRlN2dkbzVlbGRkM3NfLS0xMTZzc2EwNC4vLWEuYS5hMmFiMy06Yw%3D%3D&vl=&vr=',

videoUrlNoWaterMark: '',

videoApiUrlNoWaterMark: '',

videoMeta: {

width: 576,

height: 1024,

ratio: '720p',

duration: 15,

duetInfo: { duetFromId: '0' },

duetEnabled: true,

stitchEnabled: true,

covers: {

default:

'https://p16-sign-sg.tiktokcdn.com/obj/tos-maliva-p-0068/5f1e128e900c4008bd6d612964ef7d1b?x-expires=1603508400&x-signature=lXSV%2BKG4%2B8G%2BGJREfeNEys6m3eg%3D',

origin:

'https://p16-sign-sg.tiktokcdn.com/obj/tos-maliva-p-0068/fe538f49b1334b75890ea3d741d3e357_1602212663?x-expires=1603508400&x-signature=JlLy1gxqASLp0msjeJSxMEFco7I%3D',

diggCount: 1300000,

shareCount: 13100,

playCount: 25700,

secretID: 'awesome',

commentCount: 25700,

downloaded: false,

mentions: ['@420doggface208', '@mickfleetwood', '@tomhayes603'],

hashtags: [],

effectStickers: [],

});

it('Throw error if input url is empty', async () => {

instance.input = '';

await expect(instance.getVideoMeta()).rejects.toBe(`Url is missing`);

});

it(`Throw error if user has provided incorrect URL`, async () => {

instance.input = 'na';

await expect(instance.getVideoMeta()).rejects.toBe(`Can't extract video metadata: na`);

});

/* eslint-disable no-console */

/* eslint-disable no-await-in-loop */

/* eslint-disable no-underscore-dangle */

import rp, { OptionsWithUri } from 'request-promise';

import { CookieJar } from 'request';

import { tmpdir } from 'os';

import { writeFile, readFile, mkdir } from 'fs';

import { Parser } from 'json2csv';

import ora, { Ora } from 'ora';

import { fromCallback } from 'bluebird';

import { EventEmitter } from 'events';

import { SocksProxyAgent } from 'socks-proxy-agent';

import { forEachLimit } from 'async';

import { URLSearchParams } from 'url';

import CONST from '../constant';

import { sign, makeid } from '../helpers';

import {

PostCollector,

ScrapeType,

TikTokConstructor,

Result,

MusicMetadata,

RequestQuery,

History,

Proxy,

FeedItems,

ItemListData,

TikTokMetadata,

UserMetadata,

HashtagMetadata,

Headers,

WebHtmlUserMetadata,

VideoMetadata,

} from '../types';

import { Downloader } from '../core';

export class TikTokScraper extends EventEmitter {

private mainHost: string;

private userIdStore: string;

private download: boolean;

private filepath: string;

private json2csvParser: Parser<any>;

private filetype: string;

private input: string;

private proxy: string[] | string;

private strictSSL: boolean;

private number: number;

private since: number;

private asyncDownload: number;

private asyncScraping: () => number;

private collector: PostCollector[];

private event: boolean;

private scrapeType: ScrapeType;

private cli: boolean;

private spinner: Ora;

private byUserId: boolean;

private storeHistory: boolean;

private historyPath: string;

private idStore: string;

public Downloader: Downloader;

private storeValue: string = '';

private maxCursor: number;

private noWaterMark: boolean;

private noDuplicates: string[];

private timeout: number;

private bulk: boolean;

private validHeaders: boolean;

private csrf: string;

private zip: boolean;

private fileName: string;

private test: boolean;

private hdVideo: boolean;

private webHookUrl: string;

private method: string;

private httpRequests: {

good: number;

bad: number;

};

public headers: Headers;

private sessionList: string[];

private verifyFp: string;

private store: string[];

public cookieJar: CookieJar;

constructor({

download,

filepath,

filetype,

proxy,

strictSSL = true,

asyncDownload,

cli = false,

event = false,

progress = false,

input,

number,

since,

type,

by_user_id = false,

store_history = false,

historyPath = '',

noWaterMark = false,

useTestEndpoints = false,

fileName = '',

timeout = 0,

bulk = false,

zip = false,

test = false,

hdVideo = false,

webHookUrl = '',

method = 'POST',

headers,

verifyFp = '',

sessionList = [],

}: TikTokConstructor) {

super();

this.userIdStore = '';

this.verifyFp = verifyFp;

this.mainHost = useTestEndpoints ? 'https://t.tiktok.com/' : 'https://m.tiktok.com/';

this.headers = headers;

this.download = download;

this.filepath = process.env.SCRAPING_FROM_DOCKER ? '/usr/app/files' : filepath || '';

this.fileName = fileName;

this.json2csvParser = new Parser({ flatten: true });

this.filetype = filetype;

this.input = input;

this.test = test;

this.proxy = proxy;

this.strictSSL = strictSSL;

this.number = number;

this.since = since;

this.csrf = '';

this.zip = zip;

// Cookie jar. Where all valid cookies will be stored

this.cookieJar = rp.jar();

this.hdVideo = hdVideo;

this.sessionList = sessionList;

this.asyncDownload = asyncDownload || 5;

this.asyncScraping = (): number => {

switch (this.scrapeType) {

case 'user':

case 'trend':

return 1;

default:

return 1;

}

};

this.collector = [];

this.event = event;

this.scrapeType = type;

this.cli = cli;

this.spinner = ora({ text: 'TikTok Scraper Started', stream: process.stdout });

this.byUserId = by_user_id;

this.storeHistory = cli && download && store_history;

this.historyPath = process.env.SCRAPING_FROM_DOCKER ? '/usr/app/files' : historyPath || tmpdir();

this.idStore = '';

this.noWaterMark = noWaterMark;

this.maxCursor = 0;

this.noDuplicates = [];

this.timeout = timeout;

this.bulk = bulk;

this.validHeaders = false;

this.Downloader = new Downloader({

progress,

cookieJar: this.cookieJar,

proxy,

noWaterMark,

headers,

filepath: process.env.SCRAPING_FROM_DOCKER ? '/usr/app/files' : filepath || '',

bulk,

});

this.webHookUrl = webHookUrl;

this.method = method;

this.httpRequests = {

good: 0,

bad: 0,

};

this.store = [];

}

/**

* Get file destination(csv, zip, json)

private get fileDestination(): string {

if (this.fileName) {

if (!this.zip && this.download) {

return `${this.folderDestination}/${this.fileName}`;

}

return this.filepath ? `${this.filepath}/${this.fileName}` : this.fileName;

}

switch (this.scrapeType) {

case 'user':

case 'hashtag':

if (!this.zip && this.download) {

return `${this.folderDestination}/${this.input}_${Date.now()}`;

}

return this.filepath ? `${this.filepath}/${this.input}_${Date.now()}` : `${this.input}_${Date.now()}`;

default:

if (!this.zip && this.download) {

return `${this.folderDestination}/${this.scrapeType}_${Date.now()}`;

}

return this.filepath ? `${this.filepath}/${this.scrapeType}_${Date.now()}` : `${this.scrapeType}_${Date.now()}`;

}

/**

* Get folder destination, where all downloaded posts will be saved

private get folderDestination(): string {

switch (this.scrapeType) {

case 'user':

return this.filepath ? `${this.filepath}/${this.input}` : this.input;

case 'hashtag':

return this.filepath ? `${this.filepath}/#${this.input}` : `#${this.input}`;

case 'music':

return this.filepath ? `${this.filepath}/music_${this.input}` : `music_${this.input}`;

case 'trend':

return this.filepath ? `${this.filepath}/trend` : `trend`;

case 'video':

return this.filepath ? `${this.filepath}/video` : `video`;

default:

throw new TypeError(`${this.scrapeType} is not supported`);

}

/**

* Get api endpoint

private get getApiEndpoint(): string {

switch (this.scrapeType) {

case 'user':

return `${this.mainHost}api/post/item_list/`;

case 'trend':

return `${this.mainHost}api/recommend/item_list/`;

case 'hashtag':

return `${this.mainHost}api/challenge/item_list/`;

case 'music':

return `${this.mainHost}api/music/item_list/`;

default:

throw new TypeError(`${this.scrapeType} is not supported`);

}

/**

* Get proxy

private get getProxy(): Proxy {

const proxy =

Array.isArray(this.proxy) && this.proxy.length ? this.proxy[Math.floor(Math.random() * this.proxy.length)] : (this.proxy as string);

if (proxy) {

if (proxy.indexOf('socks4://') > -1 || proxy.indexOf('socks5://') > -1) {

return {

socks: true,

proxy: new SocksProxyAgent(proxy),

};

}

return {

socks: false,

proxy,

};

}

return {

socks: false,

proxy: '',

};

}

/**

* Main request method

* @param {} OptionsWithUri

private request<T>(

{ uri, method, qs, body, form, headers, json, gzip, followAllRedirects, simple = true }: OptionsWithUri,

bodyOnly = true,

): Promise<T> {

// eslint-disable-next-line no-async-promise-executor

return new Promise(async (resolve, reject) => {

const proxy = this.getProxy;

const options = ({

jar: this.cookieJar,

uri,

method,

...(qs ? { qs } : {}),

...(body ? { body } : {}),

...(form ? { form } : {}),

headers: {

...this.headers,

...headers,

...(this.csrf ? { 'x-secsdk-csrf-token': this.csrf } : {}),

...(json ? { json: true } : {}),

...(gzip ? { gzip: true } : {}),

resolveWithFullResponse: true,

followAllRedirects: followAllRedirects || false,

simple,

...(proxy.proxy && proxy.socks ? { agent: proxy.proxy } : {}),

...(proxy.proxy && !proxy.socks ? { proxy: `http://${proxy.proxy}/` } : {}),

...(this.strictSSL === false ? { rejectUnauthorized: false } : {}),

timeout: 10000,

} as unknown) as OptionsWithUri;

const session = this.sessionList[Math.floor(Math.random() * this.sessionList.length)];

if (session) {

this.cookieJar.setCookie(session, 'https://tiktok.com');

}

/**

* Set tt_webid_v2 cookie to access video url

const cookies = this.cookieJar.getCookieString('https://tiktok.com');

if (cookies.indexOf('tt_webid_v2') === -1) {

this.cookieJar.setCookie(`tt_webid_v2=69${makeid(17)}; Domain=tiktok.com; Path=/; Secure; hostOnly=false`, 'https://tiktok.com');

}

try {

const response = await rp(options);

// Extract valid csrf token

if (options.method === 'HEAD') {

const csrf = response.headers['x-ware-csrf-token'];

this.csrf = csrf.split(',')[1] as string;

}

setTimeout(() => {

resolve(bodyOnly ? response.body : response);

}, this.timeout);

} catch (error) {

reject(error);

}

});

}

private returnInitError(error) {

if (this.cli && !this.bulk) {

this.spinner.stop();

}

if (this.event) {

this.emit('error', error);

} else {

throw error;

}

/**

* Initiate scraping process

// eslint-disable-next-line consistent-return

public async scrape(): Promise<Result | any> {

if (this.cli && !this.bulk) {

this.spinner.start();

}

if (this.download && !this.zip) {

try {

await fromCallback(cb => mkdir(this.folderDestination, { recursive: true }, cb));

} catch (error) {

return this.returnInitError(error.message);

}

if (!this.scrapeType || CONST.scrape.indexOf(this.scrapeType) === -1) {

return this.returnInitError(`Missing scraping type. Scrape types: ${CONST.scrape} `);

}

if (this.scrapeType !== 'trend' && !this.input) {

return this.returnInitError('Missing input');

}

await this.mainLoop();

if (this.event) {

return this.emit('done', 'completed');

}

if (this.storeHistory) {

await this.getDownloadedVideosFromHistory();

}

if (this.noWaterMark) {

await this.withoutWatermark();

}

const [json, csv, zip] = await this.saveCollectorData();

if (this.storeHistory) {

// We need to make sure that we save data only about downloaded videos

this.collector.forEach(item => {

if (this.store.indexOf(item.id) === -1 && item.downloaded) {

this.store.push(item.id);

}

});

await this.storeDownloadProgress();

}

if (this.webHookUrl) {

await this.sendDataToWebHookUrl();

}

return {

headers: { ...this.headers, cookie: this.cookieJar.getCookieString('https://tiktok.com') },

collector: this.collector,

...(this.download ? { zip } : {}),

...(this.filetype === 'all' ? { json, csv } : {}),

...(this.filetype === 'json' ? { json } : {}),

...(this.filetype === 'csv' ? { csv } : {}),

...(this.webHookUrl ? { webhook: this.httpRequests } : {}),

};

}

/**

* Extract uniq video id and create the url to the video without the watermark

private withoutWatermark() {

return new Promise((resolve, reject) => {

forEachLimit(

this.collector,

async (item: PostCollector) => {

try {

item.videoApiUrlNoWaterMark = await this.extractVideoId(item);

item.videoUrlNoWaterMark = await this.getUrlWithoutTheWatermark(item.videoApiUrlNoWaterMark!);

} catch {

throw new Error(`Can't extract unique video id`);

}

err => {

if (err) {

return reject(err);

}

resolve(null);

);

});

}

/**

* Extract uniq video id

* All videos after July 27 2020 do not store unique video id

* it means that we can't extract url to the video without the watermark

* @param uri

// eslint-disable-next-line class-methods-use-this

private async extractVideoId(item: PostCollector): Promise<string> {

if (item.createTime > 1595808000) {

return '';

}

try {

const result = await rp({

uri: item.videoUrl,

headers: this.headers,

});

const position = Buffer.from(result).indexOf('vid:');

if (position !== -1) {

const id = Buffer.from(result)

.slice(position + 4, position + 36)

.toString();

return `https://api2-16-h2.musical.ly/aweme/v1/play/?video_id=${id}&vr_type=0&is_play_url=1&source=PackSourceEnum_PUBLISH&media_type=4${

this.hdVideo ? `&ratio=default&improve_bitrate=1` : ''

}`;

}

} catch {

// continue regardless of error

}

return '';

}

/**

* Get temporary url to the video without the watermark

* The url has expiration time (between 5-20 minutes+-)

* @param uri

private async getUrlWithoutTheWatermark(uri: string): Promise<string> {

if (!uri) {

return '';

}

const options = {

uri,

method: 'GET',

headers: {

'user-agent':

'com.zhiliaoapp.musically/2021600040 (Linux; U; Android 5.0; en_US; SM-N900T; Build/LRX21V; Cronet/TTNetVersion:6c7b701a 2020-04-23 QuicVersion:0144d358 2020-03-24)',

'sec-fetch-mode': 'navigate',

followAllRedirects: true,

simple: false,

};

try {

const response: {

request: { uri: { href: string } };

} = await this.request(options, false);

return response.request.uri.href;

} catch (err) {

throw new Error(`Can't extract video url without the watermark`);

}

/**

* Main loop that collects all required metadata from the tiktok web api

private mainLoop(): Promise<any> {

return new Promise((resolve, reject) => {

const taskArray = Array.from({ length: 1000 }, (v, k) => k + 1);

forEachLimit(

taskArray,

this.asyncScraping(),

(item, cb) => {

switch (this.scrapeType) {

case 'user':

this.getUserId()

.then(query => this.submitScrapingRequest({ ...query, cursor: this.maxCursor }, true))

.then(kill => cb(kill || null))

.catch(error => cb(error));

break;

case 'hashtag':

this.getHashTagId()

.then(query => this.submitScrapingRequest({ ...query, cursor: item === 1 ? 0 : (item - 1) * query.count! }, true))

.then(kill => cb(kill || null))

.catch(error => cb(error));

break;

case 'trend':

this.getTrendingFeedQuery()

.then(query => this.submitScrapingRequest({ ...query }, true))

.then(kill => cb(kill || null))

.catch(error => cb(error));

break;

case 'music':

this.getMusicFeedQuery()

.then(query => this.submitScrapingRequest({ ...query, cursor: item === 1 ? 0 : (item - 1) * query.count! }, true))

.then(kill => cb(kill || null))

.catch(error => cb(error));

break;

default:

break;

}

err => {

if (err && err !== true) {

return reject(err);

}

resolve(null);

);

});

}

/**

* Submit request to the TikTok web API

* Collect received metadata

private async submitScrapingRequest(query: RequestQuery, updatedApiResponse = false): Promise<boolean> {

try {

if (!this.validHeaders) {

/**

* As of August 13, 2021 the trend api endpoint requires ttwid cookie value that can be extracted by sending GET request to the tiktok trending page

if (this.scrapeType === 'trend') {

await this.getValidHeaders(`https://www.tiktok.com/foryou`, false, 'GET');

}

this.validHeaders = true;

}

const result = await this.scrapeData<ItemListData>(query);

if (result.statusCode !== 0) {

throw new Error(`Can't scrape more posts`);

}

const { hasMore, maxCursor, cursor } = result;

if ((updatedApiResponse && !result.itemList) || (!updatedApiResponse && !result.items)) {

throw new Error('No more posts');

}

const { done } = await this.collectPosts(updatedApiResponse ? result.itemList : result.items);

if (!hasMore) {

console.error(`Only ${this.collector.length} results could be found.`);

return true;

}

if (done) {

return true;

}

this.maxCursor = parseInt(maxCursor === undefined ? cursor : maxCursor, 10);

return false;

} catch (error) {

throw error.message ? new Error(error.message) : error;

}

/**

* Store collector data in the CSV and/or JSON files

private async saveCollectorData(): Promise<string[]> {

if (this.download) {

if (this.cli) {

this.spinner.stop();

}

if (this.collector.length && !this.test) {

await this.Downloader.downloadPosts({

zip: this.zip,

folder: this.folderDestination,

collector: this.collector,

fileName: this.fileDestination,

asyncDownload: this.asyncDownload,

});

}

let json = '';

let csv = '';

let zip = '';

if (this.collector.length) {

json = `${this.fileDestination}.json`;

csv = `${this.fileDestination}.csv`;

zip = this.zip ? `${this.fileDestination}.zip` : this.folderDestination;

await this.saveMetadata({ json, csv });

}

if (this.cli) {

this.spinner.stop();

}

return [json, csv, zip];

}

/**

* Save post metadata

* @param param0

public async saveMetadata({ json, csv }) {

if (this.collector.length) {

switch (this.filetype) {

case 'json':

await fromCallback(cb => writeFile(json, JSON.stringify(this.collector), cb));

break;

case 'csv':

await fromCallback(cb => writeFile(csv, this.json2csvParser.parse(this.collector), cb));

break;

case 'all':

await Promise.all([

await fromCallback(cb => writeFile(json, JSON.stringify(this.collector), cb)),

await fromCallback(cb => writeFile(csv, this.json2csvParser.parse(this.collector), cb)),

]);

break;

default:

break;

}

/**

* If option -s is being used then we need to

* retrieve already downloaded video id's to prevent them to be downloaded again

private async getDownloadedVideosFromHistory() {

try {

const readFromStore = (await fromCallback(cb =>

readFile(`${this.historyPath}/${this.storeValue}.json`, { encoding: 'utf-8' }, cb),

)) as string;

this.store = JSON.parse(readFromStore);

} catch {

// continue regardless of error

}

this.collector = this.collector.map(item => {

if (this.store.indexOf(item.id) !== -1) {

item.repeated = true;

}

return item;

});

this.collector = this.collector.filter(item => !item.repeated);

}

/**

* Store progress to avoid downloading duplicates

* Only available from the CLI

private async storeDownloadProgress() {

const historyType = this.scrapeType === 'trend' ? 'trend' : `${this.scrapeType}_${this.input}`;

const totalNewDownloadedVideos = this.collector.filter(item => item.downloaded).length;

if (this.storeValue && totalNewDownloadedVideos) {

let history = {} as History;

try {

const readFromStore = (await fromCallback(cb =>

readFile(`${this.historyPath}/tiktok_history.json`, { encoding: 'utf-8' }, cb),

)) as string;

history = JSON.parse(readFromStore);

} catch (error) {

history[historyType] = {

type: this.scrapeType,

input: this.input,

downloaded_posts: 0,

last_change: new Date(),

file_location: `${this.historyPath}/${this.storeValue}.json`,

};

}

if (!history[historyType]) {

history[historyType] = {

type: this.scrapeType,

input: this.input,

downloaded_posts: 0,

last_change: new Date(),

file_location: `${this.historyPath}/${this.storeValue}.json`,

};

}

history[historyType] = {

type: this.scrapeType,

input: this.input,

downloaded_posts: history[historyType].downloaded_posts + totalNewDownloadedVideos,

last_change: new Date(),

file_location: `${this.historyPath}/${this.storeValue}.json`,

};

try {

await fromCallback(cb => writeFile(`${this.historyPath}/${this.storeValue}.json`, JSON.stringify(this.store), cb));

} catch {

// continue regardless of error

}

try {

await fromCallback(cb => writeFile(`${this.historyPath}/tiktok_history.json`, JSON.stringify(history), cb));

} catch {

// continue regardless of error

}

/**

* Collect post data from the API response

* @param posts

private collectPosts(posts: FeedItems[]) {

const result = {

done: false,

};

for (let i = 0; i < posts.length; i += 1) {

if (result.done) {

break;

}

if (this.since && posts[i].createTime < this.since) {

result.done = CONST.chronologicalTypes.indexOf(this.scrapeType) !== -1;

if (result.done) {

break;

} else {

continue;

}

if (this.noDuplicates.indexOf(posts[i].id) === -1) {

this.noDuplicates.push(posts[i].id);

const item: PostCollector = {

id: posts[i].id,

secretID: posts[i].video.id,

text: posts[i].desc,

createTime: posts[i].createTime,

authorMeta: {

id: posts[i].author.id,

secUid: posts[i].author.secUid,

nickName: posts[i].author.nickname,

verified: posts[i].author.verified,

signature: posts[i].author.signature,

avatar: posts[i].author.avatarLarger,

following: posts[i].authorStats.followingCount,

fans: posts[i].authorStats.followerCount,

heart: posts[i].authorStats.heartCount,

video: posts[i].authorStats.videoCount,

digg: posts[i].authorStats.diggCount,

...(posts[i].music

? {

musicMeta: {

musicId: posts[i].music.id,

musicName: posts[i].music.title,

musicAuthor: posts[i].music.authorName,

musicOriginal: posts[i].music.original,

musicAlbum: posts[i].music.album,

playUrl: posts[i].music.playUrl,

coverThumb: posts[i].music.coverThumb,

coverMedium: posts[i].music.coverMedium,

coverLarge: posts[i].music.coverLarge,

duration: posts[i].music.duration,

}

: {}),

covers: {

default: posts[i].video.cover,

origin: posts[i].video.originCover,

dynamic: posts[i].video.dynamicCover,

webVideoUrl: `https://www.tiktok.com/@${posts[i].author.uniqueId}/video/${posts[i].id}`,

videoUrl: posts[i].video.downloadAddr,

videoUrlNoWaterMark: '',

videoApiUrlNoWaterMark: '',

videoMeta: {

height: posts[i].video.height,

width: posts[i].video.width,

duration: posts[i].video.duration,

diggCount: posts[i].stats.diggCount,

shareCount: posts[i].stats.shareCount,

playCount: posts[i].stats.playCount,

commentCount: posts[i].stats.commentCount,

downloaded: false,

mentions: posts[i].desc.match(/(@\w+)/g) || [],

hashtags: posts[i].challenges

? posts[i].challenges.map(({ id, title, desc, coverLarger }) => ({

id,

title: desc,

cover: coverLarger,

}))

: [],

effectStickers: posts[i].effectStickers

? posts[i].effectStickers.map(({ ID, name }) => ({

id: ID,

name,

}))

: [],

};

if (this.event) {

this.emit('data', item);

this.collector.push({} as PostCollector);

} else {

this.collector.push(item);

}

if (this.number) {

if (this.collector.length >= this.number) {

result.done = true;

break;

}

return result;

}

/**

* In order to execute some request, we need to extract valid cookie headers

* This request is being executed only once per run

private async getValidHeaders(url = '', signUrl = true, method = 'HEAD') {

const options = {

uri: url,

method,

...(signUrl

? {

qs: {

_signature: sign(url, this.headers['user-agent']),

}

: {}),

headers: {

'x-secsdk-csrf-request': 1,

'x-secsdk-csrf-version': '1.2.5',

};

try {

await this.request<string>(options);

} catch (error) {

throw new Error(error.message);

}

private async scrapeData<T>(qs: RequestQuery): Promise<T> {

this.storeValue = this.scrapeType === 'trend' ? 'trend' : qs.id || qs.challengeID! || qs.musicID!;

const unsignedURL = `${this.getApiEndpoint}?${new URLSearchParams(qs as any).toString()}`;

const _signature = sign(unsignedURL, this.headers['user-agent']);

const options = {

uri: this.getApiEndpoint,

method: 'GET',

qs: {

...qs,

_signature,

json: true,

};

try {

const response = await this.request<T>(options);

return response;

} catch (error) {

throw new Error(error.message);

}

/**

* Get trending feed query

// eslint-disable-next-line class-methods-use-this

private async getTrendingFeedQuery(): Promise<RequestQuery> {

return {

aid: 1988,

app_name: 'tiktok_web',

device_platform: 'web_pc',

lang: '',

from_page: 'fyp',

itemID: 1,

};

}

/**

* Get music feed query

private async getMusicFeedQuery(): Promise<RequestQuery> {

const musicIdRegex = /.com\/music\/[\w+-]+-(\d{15,22})/.exec(this.input);

if (musicIdRegex) {

this.input = musicIdRegex[1] as string;

}

return {

musicID: this.input,

lang: '',

aid: 1988,

cursor: 0,

verifyFp: '',

};

}

/**

* Get hashtag ID

private async getHashTagId(): Promise<RequestQuery> {

if (this.idStore) {

return {

challengeID: this.idStore,

cursor: 0,

aid: 1988,

verifyFp: this.verifyFp,

};

}

const id = encodeURIComponent(this.input);

const query = {

uri: `${this.mainHost}node/share/tag/${id}?uniqueId=${id}`,

qs: {

user_agent: this.headers['user-agent'],

method: 'GET',

json: true,

};

try {

const response = await this.request<TikTokMetadata>(query);

if (response.statusCode !== 0) {

throw new Error(`Can not find the hashtag: ${this.input}`);

}

this.idStore = response.challengeInfo.challenge.id;

return {

challengeID: this.idStore,

cursor: 0,

aid: 1988,

verifyFp: this.verifyFp,

};

} catch (error) {

throw new Error(error.message);

}

/**

* Get user ID

private async getUserId(): Promise<RequestQuery> {

if (this.byUserId || this.idStore) {

return {

id: this.userIdStore,

secUid: this.idStore ? this.idStore : this.input,

lang: '',

aid: 1988,

cursor: 0,

app_name: 'tiktok_web',

device_platform: 'web_pc',

cookie_enabled: true,

history_len: 2,

focus_state: true,

is_fullscreen: false,

};

}

try {

const response = await this.getUserProfileInfo();

this.idStore = response.user.secUid;

this.userIdStore = response.user.id;

return {

id: this.userIdStore,

aid: 1988,

secUid: this.idStore,

lang: '',

cursor: 0,

app_name: 'tiktok_web',

device_platform: 'web_pc',

cookie_enabled: true,

history_len: 2,

focus_state: true,

is_fullscreen: false,

};

} catch (error) {

throw new Error(error.message);

}

/**

* Get user profile information

* @param {} username

public async getUserProfileInfo(): Promise<UserMetadata> {

if (!this.input) {

throw new Error(`Username is missing`);

}

const options = {

method: 'GET',

uri: `https://www.tiktok.com/@${encodeURIComponent(this.input)}`,

json: true,

};

try {

const response = await this.request<string>(options);

const breakResponse = response

.split(/<script id="__NEXT_DATA__" type="application\/json" nonce="[\w-]+" crossorigin="anonymous">/)[1]

.split(`</script>`)[0];

if (breakResponse) {

const userMetadata: WebHtmlUserMetadata = JSON.parse(breakResponse);

return userMetadata.props.pageProps.userInfo;

}

} catch (err) {

if (err.statusCode === 404) {

throw new Error('User does not exist');

}

throw new Error(`Can't extract user metadata from the html page. Make sure that user does exist and try to use proxy`);

}

/**

* Get hashtag information

* @param {} hashtag

public async getHashtagInfo(): Promise<HashtagMetadata> {

if (!this.input) {

throw new Error(`Hashtag is missing`);

}

const query = {

uri: `${this.mainHost}node/share/tag/${this.input}?uniqueId=${this.input}`,

qs: {

appId: 1233,

method: 'GET',

json: true,

};

try {

const response = await this.request<TikTokMetadata>(query);

if (!response) {

throw new Error(`Can't find hashtag: ${this.input}`);

}

if (response.statusCode !== 0) {

throw new Error(`Can't find hashtag: ${this.input}`);

}

return response.challengeInfo;

} catch (error) {

throw new Error(error.message);

}

/**

* Get music information

* @param {} music link

public async getMusicInfo(): Promise<MusicMetadata> {

if (!this.input) {

throw new Error(`Music is missing`);

}

const musicTitle = /music\/([\w-]+)-\d+/.exec(this.input);

const musicId = /music\/[\w-]+-(\d+)/.exec(this.input);

const query = {

uri: `https://www.tiktok.com/node/share/music/${musicTitle ? musicTitle[1] : ''}-${musicId ? musicId[1] : ''}`,

qs: {

screen_width: 1792,

screen_height: 1120,

lang: 'en',

priority_region: '',

referer: '',

root_referer: '',

app_language: 'en',

is_page_visible: true,

history_len: 6,

focus_state: true,

is_fullscreen: false,

aid: 1988,

app_name: 'tiktok_web',

timezone_name: '',

device_platform: 'web',

musicId: musicId ? musicId[1] : '',

musicName: musicTitle ? musicTitle[1] : '',

method: 'GET',

json: true,

};

const unsignedURL = `${query.uri}?${new URLSearchParams(query.qs as any).toString()}`;

const _signature = sign(unsignedURL, this.headers['user-agent']);

// @ts-ignore

query.qs._signature = _signature;

try {

const response = await this.request<TikTokMetadata>(query);

if (response.statusCode !== 0) {

throw new Error(`Can't find music data: ${this.input}`);

}

return response.musicInfo;

} catch (error) {

throw new Error(error.message);

}

/**

* Sign URL

* @param {}

public async signUrl() {

if (!this.input) {

throw new Error(`Url is missing`);

}

return sign(this.input, this.headers['user-agent']);

}

/**

* Get video metadata from the HTML

* This method can be used if you aren't able to retrieve video metadata from a simple API call

* Can be slow

private async getVideoMetadataFromHtml(): Promise<FeedItems> {

const options = {

uri: this.input,

method: 'GET',

json: true,

};

try {

const response = await this.request<string>(options);

if (!response) {

throw new Error(`Can't extract video meta data`);

}

if (response.includes("__NEXT_DATA__")){

const rawVideoMetadata = response

.split(/<script id="__NEXT_DATA__" type="application\/json" nonce="[\w-]+" crossorigin="anonymous">/)[1]

.split(`</script>`)[0];

const videoProps = JSON.parse(rawVideoMetadata);

const videoData = videoProps.props.pageProps.itemInfo.itemStruct;

return videoData as FeedItems;

}

if (response.includes('SIGI_STATE')) {

const rawVideoMetadata = response.split('<script id="SIGI_STATE" type="application/json">')[1].split('</script>')[0];

const videoProps = JSON.parse(rawVideoMetadata);

const videoData = Object.values(videoProps.ItemModule)[0];

return videoData as FeedItems;

}

throw new Error('No available parser for html page')

} catch (error) {

throw new Error(`Can't extract video metadata: ${this.input}`);

}

/**

* Get video metadata from the regular API endpoint

private async getVideoMetadata(url = ''): Promise<FeedItems> {

const videoData = /tiktok.com\/(@[\w.-]+)\/video\/(\d+)/.exec(url || this.input);

if (videoData) {

const videoUsername = videoData[1];

const videoId = videoData[2];

const options = {

method: 'GET',

uri: `https://www.tiktok.com/node/share/video/${videoUsername}/${videoId}`,

json: true,

};

try {

const response = await this.request<VideoMetadata>(options);

if (response.statusCode === 0) {

return response.itemInfo.itemStruct;

}

} catch (err) {

if (err.statusCode === 404) {

throw new Error('Video does not exist');

}

throw new Error(`Can't extract video metadata: ${this.input}`);

}

/**

* Get video url without the watermark

* @param {}

public async getVideoMeta(html = true): Promise<PostCollector> {

if (!this.input) {

throw new Error(`Url is missing`);

}

let videoData = {} as FeedItems;

if (html) {

videoData = await this.getVideoMetadataFromHtml();

} else {

videoData = await this.getVideoMetadata();

}

const videoItem = {

id: videoData.id,

secretID: videoData.video.id,

text: videoData.desc,

createTime: videoData.createTime,

authorMeta: {

id: videoData.author.id,

secUid: videoData.author.secUid,

nickName: videoData.author.nickname,

following: videoData.authorStats.followingCount,

fans: videoData.authorStats.followerCount,

heart: videoData.authorStats.heartCount,

video: videoData.authorStats.videoCount,

digg: videoData.authorStats.diggCount,

verified: videoData.author.verified,

private: videoData.author.secret,

signature: videoData.author.signature,

avatar: videoData.author.avatarLarger,

musicMeta: {

musicId: videoData.music.id,

musicName: videoData.music.title,

musicAuthor: videoData.music.authorName,

musicOriginal: videoData.music.original,

coverThumb: videoData.music.coverThumb,

coverMedium: videoData.music.coverMedium,

coverLarge: videoData.music.coverLarge,

duration: videoData.music.duration,

imageUrl: videoData.video.cover,

videoUrl: videoData.video.playAddr,

videoUrlNoWaterMark: '',

videoApiUrlNoWaterMark: '',

videoMeta: {

width: videoData.video.width,

height: videoData.video.height,

ratio: videoData.video.ratio,

duration: videoData.video.duration,

duetEnabled: videoData.duetEnabled,

stitchEnabled: videoData.stitchEnabled,

duetInfo: videoData.duetInfo,

covers: {

default: videoData.video.cover,

origin: videoData.video.originCover,

diggCount: videoData.stats.diggCount,

shareCount: videoData.stats.shareCount,

playCount: videoData.stats.playCount,

commentCount: videoData.stats.commentCount,

downloaded: false,

mentions: videoData.desc.match(/(@\w+)/g) || [],

hashtags: videoData.challenges

? videoData.challenges.map(({ id, title, desc, profileLarger }) => ({

id,

title: desc,

cover: profileLarger,

}))

: [],

effectStickers: videoData.effectStickers

? videoData.effectStickers.map(({ ID, name }) => ({

id: ID,

name,

}))

: [],

} as PostCollector;

try {

if (this.noWaterMark) {

videoItem.videoApiUrlNoWaterMark = await this.extractVideoId(videoItem);

videoItem.videoUrlNoWaterMark = await this.getUrlWithoutTheWatermark(videoItem.videoApiUrlNoWaterMark);

}

} catch {

// continue regardless of error

}

this.collector.push(videoItem);

return videoItem;

}

/**

* If webhook url was provided then send POST/GET request to the URL with the data from the this.collector

private sendDataToWebHookUrl() {

return new Promise(resolve => {

forEachLimit(

this.collector,

(item, cb) => {

rp({

uri: this.webHookUrl,

method: this.method,

headers: {

'user-agent': 'TikTok-Scraper',

...(this.method === 'POST' ? { body: item } : {}),

...(this.method === 'GET' ? { qs: { json: encodeURIComponent(JSON.stringify(item)) } } : {}),

json: true,

})

.then(() => {

this.httpRequests.good += 1;

})

.catch(() => {

this.httpRequests.bad += 1;

})

.finally(() => cb(null));

() => {

resolve(null);

);

});

}

[TIKTOK CODE]

> [Twitter_Activity.data]

RANK

TREND

squidgame

squidgameedit

squidgame2

valentinesday

thanos

squidgames2

2021

игравкальмара

kdrama

player222

POSTS

3,234,649

211,923

77,195

12,629,198

882,176

103,819

21,898,120

235,771

10,122,918

77,194

> [Processing.]

> [TikTok_Activity.data]

RANK

TREND

squidgame

squidgameedit

squidgame2

valentinesday

thanos

squidgames2

2021

игравкальмара

kdrama

player222

POSTS

3,234,649

211,923

77,195

12,629,198

882,176

103,819

21,898,120

> [Processing.]

=====================================================

[ROADMAP]

[TOKENOMICS]

[GITHUB]

[TWITTER]

=====================================================

[ROADMAP]

[TOKENOMICS]

[GITHUB]

[TWITTER]

=====================================================

[ROADMAP]

[TOKENOMICS]

[GITHUB]

[TWITTER]

(//*

//(((///(//((#//*//(/*//*/*,/(

. (((((((%##%/(((##%/((*///((/(#(((((//(#/(# . .. .

/##(#%%##%/#(((#%#((#%%%#((/(###(###(/((#(/(%%(,

. /%#%%%#%%#%##(%%%((#%%%%%#(%(#/####(#(####/(((((%%%%, .

.(%%%%%%##%###%%%#((#%%%%%(%##%/#(##%%#(#%%###(#(#/(%%##(

. ,%&%&&#%&%%#%%#%#%%%%%%%%%%&%%##(#/(###%###%%%%#######%###

(* .&&&&#&&&##%&%%%%%&%%%%%##%%%%%%#%(((###%%%%%%%%%%%%##(##%%.,,.,.

*. //&&%&&%#&%&&&&&&&&%%%&%%%&%&&&%&%%##((((##(##%#&#%%%%%(####*(/

*#(/#&&%%#%&&&&&&&%%%%%%%%%%%&&&&&&&&&&&&&%%%%####%#%%%%%%%(##%%/#(*.

%#((/&%%%&&&&&&&&&&&&&&&&&&&&&&&&&@@@@@@@@@@@@@@@@@@@@@&@@@#&(#((##

#/(., &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&%%%%%&&&&&&&@@@@@@@&&%(##, *

&&&&&&&&&&&&&&&@.,.,,,.,,.,.,,,,,,,,,,,@@@@@@@&*,*,&&&,(%### /(

. .&&&&&&&&&&&&&&@,,......#@@@@@@@@,,,,,,*****/////**,,&&&&%#%/ ,

,,,*/&&&&&&&&&&&&&&&,,...,,,.,,,,,,,,,,**,**/*@@@@@@@@/,,&&&&&&##&&#%

***(&&&&&&&&&,,&&&@*.,%@@@@@@@@@@@,,,,,,,*****@&@@@@@@ *,@&&&&***,**,

/(*/&&&&&&&,,,,,,,,,%@%%&&@@@@@@&&@,,,,,,,*,**@&&@@@@ /*&@@&&&**,,*,*

/%/#&&&&&&&,.,,,,,,,,,, (&&@@@@@@@&,,,,,,,,*/**@@@@@%/**@@@&&&,//*,**

(//%&&&&&&&..,,,,,,,,,,,, ,@&&@@@@(,,,,,,.,*/////////**@@@@&&&&(/*/**

/#/&&&&&&&&&&,,,,,,,,,,,,,,,,,,,,,,,,,,,,,**////////**@@@@@&&&&//((((

#(&&&&&&&&@@@@@@@@@.,,,,,,,,,,,,,,,,,.,,,,,*///////,@@@@@@@@@&&/**/**

(&&&&&&&&&@@@@@@@@@@@@&,,,,,,,,,,,,,,,,,,*//////,#@@@@@@@@@@@&&/.((((

(&&&&&&&&&@@@@@@@@@@@@@@@@@@.,,,,,,,,,,,,,///,@@@@@@@@@@@@@@@@&&#(##(

,&&&&&&&@@@@@@@@@@@@@@@@@@@@@@@@/*//////@@@@@@@@@@@@@@@@@@@@@&&&(%###

%&&&&&&&&@@@@@@@@@@@@@@@@@@@%%,.. (@&..,%@@@@@@@@@@@@@@@@@@@@&&//(&#

#&&&&&&&@@@@@@@@@@@@@@@@@%#%#%%*, .@@@*.,%%%@@@@@@@@@@@@@@@@@@&&&(#%&

%&&&&&&&@@@@@@@@@@@@@&###%%%%%%%...,@&.,/%%%%#%@@@(%%%%#%%%%%&&&&/#(#

%&&&&&&&@@@@@@@@@@@@%#%%%%%%%%%%....@@ %%%%%%#/%%##%%%%%%%%%%/&&%%#

&&&&&&&&@@@@@@@@@@@%%%%%%%%%%%%%....@@@ %%%#%%((#%%%%%%%%%%%%%#(&///

(&&&&&@@@@@@@@@@@@%%%%%%%%%%%%%%.,..@@@..*%#%%((((#%%%%%%%%%%%%%%&##/

&&&&&&&@@@@@@@@@@@###%%%%%%%%%%%(...@@@ ..%%%%%((((#%%%%%%%%%%%%%&(##

&&&&&&@@@@@@@@@@@@%#%%%%%%%%%%%%%,..@@@@..&%%%%%((((%%%%%%%%%%%%&&&%%

&&&&&&@@@@@@@@@@@%%#%##%%%%%%%%%&,..@@@@.,,%%#%%(##((%%%%%%%%(@@&&&&&

&&&&&&&@@@@@@@@@@%%#%#%%%%%%#%%%%,, @@@@(,%%%%/(%###%#&@@@@@@@@&&&&&%

&&&&&&&@@@@@@@@@@%%%##%%%%%%%%%%%.,.@@#&@%&&%%#%%##%%%#@@@&@@@@@&&&&%

&&&&&&&&@@@@@@@@%%%%%%%%%%%%%#%%%./**,*,,%,(%#%%%%#%%##%@@@&@@@@&&&&%

&&&&&&&&@@@@@@@@%%%%%#%%%%%%%##%/**,,,,,,*,,%%##%%%#%%##@@@@@@@&&&&&&