/// <summary> Read a IIS web log, analyze completely, and return the corresponding <see cref="SobekCM_Stats_DataSet"/> object </summary>
/// <param name="Log_File"> Location for the log file to read </param>
/// <returns> Object with all the analyzed hits and sessions from the web log </returns>
public SobekCM_Stats_DataSet Read_Log(string Log_File)
{
// Create the list of hits
hits = new SortedList<SobekCM_Hit, SobekCM_Hit>();
// Create the list of sessions
sessions = new Dictionary<string, SobekCM_Session>();
// Create the return set
SobekCM_Stats_DataSet returnValue = new SobekCM_Stats_DataSet();
// Get the date of the log file
FileInfo fileInfo = new FileInfo(Log_File);
string name = fileInfo.Name.Replace(fileInfo.Extension, "");
DateTime logDate = new DateTime(Convert.ToInt32("20" + name.Substring(4, 2)),
Convert.ToInt32(name.Substring(6, 2)), Convert.ToInt32(name.Substring(8, 2)));
returnValue.Date = logDate;
// Open a connection to the log file and save each hit
StreamReader reader = new StreamReader(Log_File);
string line = reader.ReadLine();
while (line != null)
{
parse_line(line);
line = reader.ReadLine();
}
// Now, step through each hit in the list
foreach (SobekCM_Hit hit in hits.Values)
{
if (hit.SobekCM_URL.ToUpper().IndexOf(".ASPX") < 0)
{
// Always increment the hits
returnValue.Increment_Hits();
// Add this IP hit
returnValue.Add_IP_Hit(hit.IP, hit.UserAgent);
// Shouldn't start with '/'
if (hit.SobekCM_URL[0] == '/')
{
hit.SobekCM_URL = hit.SobekCM_URL.Substring(1);
}
hit.SobekCM_URL = hit.SobekCM_URL.ToLower();
if (hit.SobekCM_URL.IndexOf("design/webcontent/") == 0)
hit.SobekCM_URL = hit.SobekCM_URL.Substring(18);
// Add this as a webcontent hit
returnValue.Add_WebContent_Hit(hit.SobekCM_URL);
}
else
{
// parse the url
string[] splitter = hit.Query_String.ToLower().Split("&".ToCharArray());
NameValueCollection queryStringCollection = new NameValueCollection();
foreach (string thisSplit in splitter)
{
int equals_index = thisSplit.IndexOf("=");
if ((equals_index > 0) && (equals_index < thisSplit.Length - 1))
{
string query_name = thisSplit.Substring(0, equals_index);
string query_value = thisSplit.Substring(equals_index + 1);
queryStringCollection[query_name] = query_value;
if (query_name.ToLower() == "portal")
hit.SobekCM_URL = query_value;
}
}
// Now, get the navigation object using the standard SobekCM method
try
{
Navigation_Object currentMode = new Navigation_Object();
QueryString_Analyzer.Parse_Query(queryStringCollection, currentMode, hit.SobekCM_URL,
new string[] { "en" }, Engine_ApplicationCache_Gateway.Codes, Engine_ApplicationCache_Gateway.Collection_Aliases,
Engine_ApplicationCache_Gateway.Items, Engine_ApplicationCache_Gateway.URL_Portals, Engine_ApplicationCache_Gateway.WebContent_Hierarchy, null);
if (currentMode != null)
currentMode.Set_Robot_Flag(hit.UserAgent, hit.IP);
if ((currentMode != null) && (!currentMode.Is_Robot))
{
// Always increment the hits
returnValue.Increment_Hits();
// Add this IP hit
returnValue.Add_IP_Hit(hit.IP, hit.UserAgent);
// Increment the portal hits
returnValue.Add_Portal_Hit(currentMode.Instance_Name.ToUpper());
// Check for pre-existing session
SobekCM_Session thisSession;
if (sessions.ContainsKey(hit.IP))
{
SobekCM_Session possibleSession = sessions[hit.IP];
TimeSpan difference = hit.Time.Subtract(possibleSession.Last_Hit);
if (difference.TotalMinutes >= 60)
{
thisSession = new SobekCM_Session(hit.IP, hit.Time);
sessions[hit.IP] = thisSession;
returnValue.Increment_Sessions();
}
else
{
possibleSession.Last_Hit = hit.Time;
thisSession = possibleSession;
}
}
else
{
thisSession = new SobekCM_Session(hit.IP, hit.Time);
sessions.Add(hit.IP, thisSession);
returnValue.Increment_Sessions();
}
if ((currentMode.Mode == Display_Mode_Enum.Item_Display) ||
(currentMode.Mode == Display_Mode_Enum.Item_Print))
{
if (((currentMode.ItemID_DEPRECATED.HasValue ) && ( currentMode.ItemID_DEPRECATED > 0)) ||
(( !String.IsNullOrEmpty(currentMode.VID)) && (!String.IsNullOrEmpty(currentMode.BibID))))
{
if ((!currentMode.ItemID_DEPRECATED.HasValue ) || ( currentMode.ItemID_DEPRECATED < 0 ))
{
if (bib_vid_itemid_dictionary.ContainsKey(currentMode.BibID + ":" + currentMode.VID))
{
currentMode.ItemID_DEPRECATED = bib_vid_itemid_dictionary[currentMode.BibID + ":" + currentMode.VID];
}
}
int itemid = -1;
if (currentMode.ItemID_DEPRECATED.HasValue)
itemid = currentMode.ItemID_DEPRECATED.Value;
returnValue.Add_Item_Hit(itemid, currentMode.BibID,
currentMode.VID, currentMode.ViewerCode,
currentMode.Text_Search, thisSession.SessionID);
}
else if ( !String.IsNullOrEmpty(currentMode.BibID))
{
returnValue.Add_Bib_Hit(currentMode.BibID.ToUpper(), thisSession.SessionID);
}
}
else
{
string code = currentMode.Aggregation;
string institution = String.Empty;
if (( !String.IsNullOrEmpty(code)) && (code.ToUpper()[0] == 'I'))
{
institution = code;
code = String.Empty;
}
if (( !String.IsNullOrEmpty(institution)) && (institution.ToUpper()[0] != 'I'))
institution = "i" + institution;
// For some collections we are counting the institution hit and collection
// hit just so the full use of the site is recorded
if ( !String.IsNullOrEmpty(code))
{
returnValue.Add_Collection_Hit(code.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID);
}
// Was this an institutional level hit?
if ( !String.IsNullOrEmpty(institution))
{
returnValue.Add_Institution_Hit(institution.ToLower(), currentMode.Mode, currentMode.Aggregation_Type, thisSession.SessionID);
}
// Is this a static "webcontent" top-level page?
if (currentMode.Mode == Display_Mode_Enum.Simple_HTML_CMS)
{
if ((currentMode.Info_Browse_Mode != "unknown") &&
(currentMode.Info_Browse_Mode != "default"))
{
returnValue.Add_WebContent_Hit(currentMode.Info_Browse_Mode.ToLower());
}
}
// Add the write type, if not normal HTML stuff
switch (currentMode.Writer_Type)
{
case Writer_Type_Enum.DataSet:
case Writer_Type_Enum.XML:
returnValue.Add_XML_Hit();
break;
case Writer_Type_Enum.OAI:
returnValue.Add_OAI_Hit();
break;
case Writer_Type_Enum.JSON:
returnValue.Add_JSON_Hit();
break;
}
}
}
else
{
if ((currentMode != null) && (currentMode.Is_Robot))
returnValue.Add_Robot_Hit();
}
}
catch (Exception ee)
{
if (ee.Message.Length > 0)
return null;
// Do nothing.. not important?
}
}
}
return returnValue;
}