Commit 42119f6a authored by Alexandra Rogova's avatar Alexandra Rogova

index received from db is bad

parent 3ff3b171
jsSHA @ 766f8ff7
Subproject commit 766f8ff7d926347b008a252a41b06565df747ac5
rusha @ 7dc20211
Subproject commit 7dc2021195219e54b2696af524141696f35694dd
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
<script src="../../external/renderjs/dist/renderjs-latest.js"></script> <script src="../../external/renderjs/dist/renderjs-latest.js"></script>
<script src="../../external/flexsearch/flexsearch.js"></script> <script src="../../external/flexsearch/flexsearch.js"></script>
<script src="../../external/msgpack-lite/dist/msgpack.min.js"></script> <script src="../../external/msgpack-lite/dist/msgpack.min.js"></script>
<script src="../../external/jsSHA/src/sha1.js"></script>
<script src="../js/gadget_model.js"></script> <script src="../js/gadget_model.js"></script>
</head> </head>
<body> <body>
......
...@@ -18,31 +18,30 @@ ...@@ -18,31 +18,30 @@
.setState({ .setState({
to_load: [ to_load: [
"44_svt.xml", //135 urls //"44_svt.xml", //135 urls
// "allemandfacile.xml", //650 urls //"allemandfacile.xml", //650 urls
// "anglaisfacile.xml", //567 urls //"anglaisfacile.xml", //567 urls
// "bescherelle.xml", //60 urls "codeacademy.xml", //28 urls
// "codeacademy.xml", //28 urls //"francaisfacile.xml", //1119 urls
// "francaisfacile.xml", //1119 urls
//"hgeo_college.xml", //227 urls //"hgeo_college.xml", //227 urls
//"histoirencours.xml", //1415 urls // "histoirencours.xml", //1415 urls
//"italienfacile.xml", //1477 urls // "italienfacile.xml", //1478 urls
//"jerevise.xml", //918 urls // "jerevise.xml", //919 urls
// "junior_science_et_vie.xml", //532 urls // "junior_science_et_vie.xml", //532 urls
// "kmusic.xml", //107 urls //"kmusic.xml", //107 urls
// "larousse.xml", //4563 urls // "larousse.xml", //4563 urls
// //"letudiant.xml", //41649 urls // //"letudiant.xml", //41649 urls
// "lewebpedagogique.xml", //298 urls // "lewebpedagogique.xml", //298 urls
// //"livrespourtous.xml", //12061 urls // //"livrespourtous.xml", //12061 urls
// "mathovore.xml", //2221 urls // "mathovore.xml", //2222 urls
// "monanneeaucollege.xml", //121 urls //"monanneeaucollege.xml", //121 urls
// "nosdevoirs.xml", //462 urls // "nosdevoirs.xml", //462 urls
// "physagreg.xml", //150 urls //"physagreg.xml", //150 urls
// "physique_chimie_college.xml", //282 urls //"physique_chimie_college.xml", //282 urls
// "reviser_brevet.xml", //229 urls //"reviser_brevet.xml", //229 urls
// "soutien67.xml", //1604 urls // "soutien67.xml", //1604 urls
// //"superprof.xml", //12296 urls // //"superprof.xml", //12296 urls
// "technologieaucollege27.xml", //129 urls //"technologieaucollege27.xml", //129 urls
// "espagnolfacile.xml", //3352 urls // "espagnolfacile.xml", //3352 urls
// "vivelessvt.xml", //1257 urls // "vivelessvt.xml", //1257 urls
// // TEST SITEMAPS TO FILL INDEX // // TEST SITEMAPS TO FILL INDEX
...@@ -67,17 +66,17 @@ ...@@ -67,17 +66,17 @@
// "vox.xml", //1194 urls // "vox.xml", //1194 urls
// "cbsnews.xml", //1260 urls // "cbsnews.xml", //1260 urls
// "mirror.xml", //3528 urls // "mirror.xml", //3528 urls
// "abcnews.xml", //1077 urls // "abcnews.xml", //971 urls
// "lequipe.xml", //3455 urls // "lequipe.xml", //3455 urls
// "rugbyrama.xml", //1817 urls //"rugbyrama.xml", //1817 urls
// "elle.xml", //3532 urls // "elle.xml", //3532 urls
// "figaro.xml", //2965 urls // "figaro.xml", //2965 urls
// "lepoint.xml", //3747 urls // "lepoint.xml", //3747 urls
// "telerama.xml", //2593 urls // "telerama.xml", //2593 urls
// "liberation.xml", //819 urls // "liberation.xml", //819 urls
// "lemonde.xml", //3517 urls // "lemonde.xml", //3517 urls
// "leparisien.xml", //2189 urls //"leparisien.xml", //2189 urls
// "latribune.xml" //3190 urls //"latribune.xml" //3190 urls
] ]
}) })
...@@ -118,16 +117,6 @@ ...@@ -118,16 +117,6 @@
return RSVP.all(promise_list); return RSVP.all(promise_list);
} }
}); });
/* return gadget.is_db_empty()
.push(function(empty){
if (empty) {
for (var i=0; i<gadget.state.to_load.length; i+=1){
promise_list.push(gadget.load_file("../../../crawler_test/" + gadget.state.to_load[i]));
}
return RSVP.all(promise_list);
}
});*/
}) })
.declareMethod("load_file", function(file_path, file_name){ //OK .declareMethod("load_file", function(file_path, file_name){ //OK
......
...@@ -2,24 +2,19 @@ ...@@ -2,24 +2,19 @@
/*global window, RSVP, rJS, jIO*/ /*global window, RSVP, rJS, jIO*/
(function (window, document, RSVP, rJS, jIO) { (function (window, document, RSVP, rJS, jIO) {
"use strict"; "use strict";
rJS(window) rJS(window)
.ready(function(){ .ready(function(){
var index, db; var index, db;
var index = FlexSearch.create("memory"); index = FlexSearch.create("memory");
db = jIO.createJIO( db = jIO.createJIO(
{ {
type : "query",
sub_storage : {
type : "uuid",
sub_storage : {
type : "indexeddb", type : "indexeddb",
database : "mynij" database : "mynij"
} }
}
}
); );
this.changeState({ this.changeState({
...@@ -45,24 +40,24 @@ ...@@ -45,24 +40,24 @@
}) })
.declareMethod("add_page", function(page_info){ //page_info = {link, title, description, item} .declareMethod("add_page", function(page_info){ //page_info = {link, title, description, item}
console.log(this.state.index.info());
var gadget = this, var gadget = this,
tmp; tmp;
tmp = page_info; tmp = page_info;
tmp.portal_type = "page"; tmp.portal_type = "page";
return gadget.state.db.put(page_info.link, tmp) return gadget.state.db.put(page_info.link, tmp)
.push(function(){ .push(function(){
var defer = RSVP.defer(); /*var defer = RSVP.defer();
gadget.state.index.add(page_info.link, page_info.title + " " + page_info.item, defer.resolve.bind(defer)); gadget.state.index.add(page_info.link, page_info.title + " " + page_info.item, defer.resolve.bind(defer));
return defer.promise; return defer.promise;*/
}) return gadget.state.index.add(page_info.link, page_info.title + " " + page_info.item);
.push(function(){
return gadget._save_index();
}); });
}) })
.declareMethod("loaded", function(file_name){ .declareMethod("loaded", function(file_name){
var gadget = this, var gadget = this,
id; id;
console.log(file_name + " done loading");
return gadget.state.db.get("loaded") return gadget.state.db.get("loaded")
.push(function(result){ .push(function(result){
...@@ -71,29 +66,183 @@ ...@@ -71,29 +66,183 @@
return gadget.state.db.put("loaded", tmp); return gadget.state.db.put("loaded", tmp);
}) })
.push(undefined, function (my_error) { .push(undefined, function (my_error) {
console.log(my_error);
var tmp = {}; var tmp = {};
tmp[file_name] = true; tmp[file_name] = true;
return gadget.state.db.put("loaded", tmp); return gadget.state.db.put("loaded", tmp);
})
.always(function(){
return gadget._save_index();
}); });
}) })
.declareMethod("_save_index", function(){ //OK .declareMethod("_save_index", function(){
/* var gadget = this;
console.log("getting index");
var serialized = this.state.index.export_test(); var serialized = this.state.index.export_test();
return this.state.db.put("index", serialized); console.log("Index received. Saving");
return this.state.db.put("index", this.state.index.info())
.push(function(){
console.log(1);
//return gadget.state.db.putAttachment("index", "ids", new Blob([serialized.ids], {type : "application/json"}));
return gadget.state.db.putAttachment("index", "ids", new Blob([serialized.ids], {type : "text/plain"}));
})
.push(function(){
console.log(2);
//return gadget.state.db.putAttachment("index", "map", new Blob([serialized.map], {type : "application/json"}));
var shaObj = new jsSHA("SHA-1", "TEXT");
shaObj.update(serialized.map);
console.log("map sha1 : " + shaObj.getHash("HEX"));
return gadget.state.db.putAttachment("index", "map", new Blob([serialized.map], {type : "text/plain"}));
})
.push(function(){
console.log(3);
//return gadget.state.db.putAttachment("index", "ctx", new Blob([serialized.ctx], {type : "application/json"}));
return gadget.state.db.putAttachment("index", "ctx", new Blob([serialized.ctx], {type : "text/plain"}));
})
.push(function(){
console.log("index saved");
})
.push(undefined, function (my_error) {
throw my_error;
//console.log(my_error);
});*/
var gadget = this,
serialized,
i,
j = 0,
promise_list = [];
console.log("getting index");
serialized = this.state.index.export_test();
console.log("Index received. Saving");
return this.state.db.put("index_map", {})
.push(function(){
console.log(serialized.map.slice(0, 1000));
for (i = 0; i < serialized.map.length; i += 10000){
promise_list.push(gadget.state.db.putAttachment("index_map", j+"", new Blob([serialized.map.slice(i, i+10000)], {type : "application/json"})));
j+=1;
}
j = 0;
return RSVP.all(promise_list);
})
.push(function(){
return gadget.state.db.put("index_ids", {});
})
.push(function(){
promise_list = [];
for (i = 0; i < serialized.map.length; i += 10000){
promise_list.push(gadget.state.db.putAttachment("index_ids", j+"", new Blob([serialized.ids.slice(i, i+10000)], {type : "application/json"})));
j+=1;
}
j = 0;
return RSVP.all(promise_list);
})
.push(function(){
return gadget.state.db.put("index_ctx", {});
})
.push(function(){
promise_list = [];
for (i = 0; i < serialized.ctx.length; i += 10000){
promise_list.push(gadget.state.db.putAttachment("index_ctx", j+"", new Blob([serialized.map.slice(i, i+10000)], {type : "application/json"})));
j+=1;
}
return RSVP.all(promise_list);
})
.push(function(){
console.log("index saved");
})
.push(undefined, function (my_error) {
console.log(my_error);
});
}) })
.declareMethod("search", function(query){ .declareMethod("search", function(query){
return this.state.index.search(query); return this.state.index.search(query);
}) })
.declareMethod("_load_index", function(msgpack){ //OK .declareMethod("_load_index", function(msgpack){
var gadget = this; /* var gadget = this,
return gadget.state.db.get("index") ids,
.push(function(index){ map;
gadget.state.index.import_test(index.ids, index.map, index.ctx); return gadget.state.db.getAttachment("index", "ids", {"format": "text"})
.push(function(result){
ids = result;
return gadget.state.db.getAttachment("index", "map", {"format": "text"});
})
.push(function(result){
map = result;
var shaObj = new jsSHA("SHA-1", "TEXT");
shaObj.update(map);
console.log("map sha1 : " + shaObj.getHash("HEX"));
return gadget.state.db.getAttachment("index", "ctx", {"format": "text"});
})
.push(function(result){
gadget.state.index.import_test(ids, map, result);
console.log("index imported from memory");
})
.push(undefined, function (my_error) {console.log(my_error)});*/
var gadget = this,
ids,
map,
ctx,
promise_list = [],
i;
return gadget.state.db.allAttachments("index_ids")
.push(function(result){
if (Object.keys(result).length !== 0){
for (i = 0; i < Object.keys(result).length; i+=1){
promise_list.push(gadget.state.db.getAttachment("index_ids", i+"", {format : "text"}));
}
return RSVP.all(promise_list);
} else {
return null;
}
})
.push(function(result){
if (result === null) ids = "";
else ids = result.join("");
return gadget.state.db.allAttachments("index_map");
})
.push(function(result){
if (Object.keys(result).length !== 0){
for (i = 0; i < Object.keys(result).length; i+=1){
promise_list.push(gadget.state.db.getAttachment("index_map", i+"", {format : "text"}));
}
return RSVP.all(promise_list);
} else {
return null;
}
})
.push(function(result){
console.log(result[0]);
if (result === null) map = "";
else map = result.join("");
console.log(map.slice(0, 1000));
return gadget.state.db.allAttachments("index_ctx");
})
.push(function(result){
if (Object.keys(result).length !== 0){
for (i = 0; i < Object.keys(result).length; i+=1){
promise_list.push(gadget.state.db.getAttachment("index_ctx", i+"", {format : "text"}));
}
return RSVP.all(promise_list);
} else {
return null;
}
})
.push(function(result){
if (result === null) ctx = "";
else ctx = result.join("");
return gadget.add_index(ids, map, ctx);
})
.push(function(result){
console.log("index imported from memory"); console.log("index imported from memory");
}) })
.push(undefined, function (my_error) {}); .push(undefined, function (my_error) {
console.log(my_error);
});
}) })
.declareMethod("is_empty", function(){ .declareMethod("is_empty", function(){
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
return; return;
} }
if (argument_list.length > 0) { if (argument_list.length > 0) {
console.log(argument_list.length);
function_used.apply(context, argument_list.shift()) function_used.apply(context, argument_list.shift())
.then(function(result) { .then(function(result) {
pushAndExecute(global_defer); pushAndExecute(global_defer);
...@@ -47,11 +48,12 @@ ...@@ -47,11 +48,12 @@
links = new DOMParser().parseFromString(links_file, "text/xml").getElementsByTagName("url"), links = new DOMParser().parseFromString(links_file, "text/xml").getElementsByTagName("url"),
links_modified = [], links_modified = [],
i; i;
console.log(file_name + " : " + links.length);
for (i=0; i<links.length; i+=1){ for (i=0; i<links.length; i+=1){
links_modified[i] = [links[i].getElementsByTagName('loc')[0].textContent]; links_modified[i] = [links[i].getElementsByTagName('loc')[0].textContent];
} }
return new RSVP.Queue().push(function() { return new RSVP.Queue().push(function() {
return dispatchQueue(gadget, gadget._get, links_modified, 1); return dispatchQueue(gadget, gadget._get, links_modified, 3);
}) })
.push(function(){ .push(function(){
return gadget.add_file(file_name); return gadget.add_file(file_name);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment