a = (
domains_nameservers.select("*")
.fillna("1970-01-01", ["first_seen"])
.fillna("2022-12-31", ["last_seen"])
.groupBy("domain_id", "last_seen", "first_seen")
.agg(F.countDistinct("*").name("counter"))
.orderBy(F.asc("counter"))
)
import sys
import pyspark.sql.functions as func
from pyspark.sql.window import Window
windowSpec = Window.partitionBy(F.col("domain_id")).orderBy(
F.col("last_seen"), F.col("first_seen")
)
b = a.select("*").withColumn(
"grp",
((F.datediff(F.col("first_seen"), F.lag(F.col("last_seen"), 1).over(windowSpec)))),
)
c = b.select("*").withColumn(
"match",
F.when(
(
(
F.floor(
F.datediff(
F.col("first_seen"),
F.lag(F.col("last_seen"), 1).over(windowSpec),
)
/ 91
)
)
)
> 1,
"1",
).otherwise("0"),
)
w = (
Window.partitionBy("domain_id")
.orderBy("last_seen", "first_seen")
.rowsBetween(Window.unboundedPreceding, Window.currentRow)
)
newDF = c.select("*").withColumn("val_sum", F.sum(F.col("match")).over(w))
nom = (
newDF.select("*")
.groupBy("domain_id", "val_sum")
.agg(F.min("first_seen").name("first_seen"), F.max("last_seen").name("last_seen"))
.where("first_seen!='1970-01-01' and last_seen!='2022-12-31'")
)
Replies to Untitled
Title |
Name |
Language |
UNIX |
When |
Re: Untitled |
Antonia |
python |
1653238240 |
1 Year ago. |
{"html5":"htmlmixed","css":"css","javascript":"javascript","php":"php","python":"python","ruby":"ruby","lua":"text\/x-lua","bash":"text\/x-sh","go":"go","c":"text\/x-csrc","cpp":"text\/x-c++src","diff":"diff","latex":"stex","sql":"sql","xml":"xml","apl":"apl","asterisk":"asterisk","c_loadrunner":"text\/x-csrc","c_mac":"text\/x-csrc","coffeescript":"text\/x-coffeescript","csharp":"text\/x-csharp","d":"d","ecmascript":"javascript","erlang":"erlang","groovy":"text\/x-groovy","haskell":"text\/x-haskell","haxe":"text\/x-haxe","html4strict":"htmlmixed","java":"text\/x-java","java5":"text\/x-java","jquery":"javascript","mirc":"mirc","mysql":"sql","ocaml":"text\/x-ocaml","pascal":"text\/x-pascal","perl":"perl","perl6":"perl","plsql":"sql","properties":"text\/x-properties","q":"text\/x-q","scala":"scala","scheme":"text\/x-scheme","tcl":"text\/x-tcl","vb":"text\/x-vb","verilog":"text\/x-verilog","yaml":"text\/x-yaml","z80":"text\/x-z80"}